{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "f1850684-b8ae-4dfe-99ec-99dc4ae6ad0f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import alpaca_eval"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11b7c5f9-5c67-46a8-9f04-8eb34925a60a",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "242230f4-f20f-434f-87e0-2c151b047848",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>instruction</th>\n",
       "      <th>output_1</th>\n",
       "      <th>generator_1</th>\n",
       "      <th>dataset</th>\n",
       "      <th>output_2</th>\n",
       "      <th>generator_2</th>\n",
       "      <th>datasplit</th>\n",
       "      <th>annotator</th>\n",
       "      <th>preference</th>\n",
       "      <th>price_per_example</th>\n",
       "      <th>time_per_example</th>\n",
       "      <th>raw_completion</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>What are the names of some famous actors that ...</td>\n",
       "      <td>Several famous actors started their careers on...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>helpful_base</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>eval</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>1.990014</td>\n",
       "      <td>0.01000</td>\n",
       "      <td>0.179206</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>How did US states get their names?</td>\n",
       "      <td>The names of U.S. states are derived from a va...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>helpful_base</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>eval</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>1.459366</td>\n",
       "      <td>0.01263</td>\n",
       "      <td>0.179206</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Hi, my sister and her girlfriends want me to p...</td>\n",
       "      <td>Kickball is a fun and simple game that is simi...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>helpful_base</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>eval</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>1.066485</td>\n",
       "      <td>0.01220</td>\n",
       "      <td>0.179206</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>What is some cool music from the 1920s?</td>\n",
       "      <td>The 1920s, often referred to as the \"Roaring T...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>helpful_base</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>eval</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>1.881640</td>\n",
       "      <td>0.01200</td>\n",
       "      <td>0.179206</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>How do I wrap a present neatly?</td>\n",
       "      <td>Wrapping a present neatly can be quite straigh...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>helpful_base</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>eval</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>1.948271</td>\n",
       "      <td>0.01061</td>\n",
       "      <td>0.179206</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                         instruction  \\\n",
       "0  What are the names of some famous actors that ...   \n",
       "1                 How did US states get their names?   \n",
       "2  Hi, my sister and her girlfriends want me to p...   \n",
       "3            What is some cool music from the 1920s?   \n",
       "4                    How do I wrap a present neatly?   \n",
       "\n",
       "                                            output_1        generator_1  \\\n",
       "0  Several famous actors started their careers on...  gpt4_1106_preview   \n",
       "1  The names of U.S. states are derived from a va...  gpt4_1106_preview   \n",
       "2  Kickball is a fun and simple game that is simi...  gpt4_1106_preview   \n",
       "3  The 1920s, often referred to as the \"Roaring T...  gpt4_1106_preview   \n",
       "4  Wrapping a present neatly can be quite straigh...  gpt4_1106_preview   \n",
       "\n",
       "        dataset                                           output_2  \\\n",
       "0  helpful_base   } } } } } verify }:param } } } withObject }pr...   \n",
       "1  helpful_base   } } } } } verify }:param } } } withObject }pr...   \n",
       "2  helpful_base   } } } } } verify }:param } } } withObject }pr...   \n",
       "3  helpful_base   } } } } } verify }:param } } } withObject }pr...   \n",
       "4  helpful_base   } } } } } verify }:param } } } withObject }pr...   \n",
       "\n",
       "  generator_2 datasplit                        annotator  preference  \\\n",
       "0     example      eval  weighted_alpaca_eval_gpt4_turbo    1.990014   \n",
       "1     example      eval  weighted_alpaca_eval_gpt4_turbo    1.459366   \n",
       "2     example      eval  weighted_alpaca_eval_gpt4_turbo    1.066485   \n",
       "3     example      eval  weighted_alpaca_eval_gpt4_turbo    1.881640   \n",
       "4     example      eval  weighted_alpaca_eval_gpt4_turbo    1.948271   \n",
       "\n",
       "   price_per_example  time_per_example  \\\n",
       "0            0.01000          0.179206   \n",
       "1            0.01263          0.179206   \n",
       "2            0.01220          0.179206   \n",
       "3            0.01200          0.179206   \n",
       "4            0.01061          0.179206   \n",
       "\n",
       "                                      raw_completion  \n",
       "0  {'finish_reason': 'length', 'index': 0, 'logpr...  \n",
       "1  {'finish_reason': 'length', 'index': 0, 'logpr...  \n",
       "2  {'finish_reason': 'length', 'index': 0, 'logpr...  \n",
       "3  {'finish_reason': 'length', 'index': 0, 'logpr...  \n",
       "4  {'finish_reason': 'length', 'index': 0, 'logpr...  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_json(\"example/weighted_alpaca_eval_gpt4_turbo/annotations.json\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "49e18c34-e842-4392-adaa-e757b482967d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "40a3dad2-01f1-40b0-b577-935b9e31df09",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>instruction</th>\n",
       "      <th>output_1</th>\n",
       "      <th>generator_1</th>\n",
       "      <th>output_2</th>\n",
       "      <th>generator_2</th>\n",
       "      <th>annotator</th>\n",
       "      <th>raw_completion</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>What are the names of some famous actors that ...</td>\n",
       "      <td>Several famous actors started their careers on...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>How did US states get their names?</td>\n",
       "      <td>The names of U.S. states are derived from a va...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Hi, my sister and her girlfriends want me to p...</td>\n",
       "      <td>Kickball is a fun and simple game that is simi...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>What is some cool music from the 1920s?</td>\n",
       "      <td>The 1920s, often referred to as the \"Roaring T...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>How do I wrap a present neatly?</td>\n",
       "      <td>Wrapping a present neatly can be quite straigh...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                         instruction  \\\n",
       "0  What are the names of some famous actors that ...   \n",
       "1                 How did US states get their names?   \n",
       "2  Hi, my sister and her girlfriends want me to p...   \n",
       "3            What is some cool music from the 1920s?   \n",
       "4                    How do I wrap a present neatly?   \n",
       "\n",
       "                                            output_1        generator_1  \\\n",
       "0  Several famous actors started their careers on...  gpt4_1106_preview   \n",
       "1  The names of U.S. states are derived from a va...  gpt4_1106_preview   \n",
       "2  Kickball is a fun and simple game that is simi...  gpt4_1106_preview   \n",
       "3  The 1920s, often referred to as the \"Roaring T...  gpt4_1106_preview   \n",
       "4  Wrapping a present neatly can be quite straigh...  gpt4_1106_preview   \n",
       "\n",
       "                                            output_2 generator_2  \\\n",
       "0   } } } } } verify }:param } } } withObject }pr...     example   \n",
       "1   } } } } } verify }:param } } } withObject }pr...     example   \n",
       "2   } } } } } verify }:param } } } withObject }pr...     example   \n",
       "3   } } } } } verify }:param } } } withObject }pr...     example   \n",
       "4   } } } } } verify }:param } } } withObject }pr...     example   \n",
       "\n",
       "                         annotator  \\\n",
       "0  weighted_alpaca_eval_gpt4_turbo   \n",
       "1  weighted_alpaca_eval_gpt4_turbo   \n",
       "2  weighted_alpaca_eval_gpt4_turbo   \n",
       "3  weighted_alpaca_eval_gpt4_turbo   \n",
       "4  weighted_alpaca_eval_gpt4_turbo   \n",
       "\n",
       "                                      raw_completion  \n",
       "0  {'finish_reason': 'length', 'index': 0, 'logpr...  \n",
       "1  {'finish_reason': 'length', 'index': 0, 'logpr...  \n",
       "2  {'finish_reason': 'length', 'index': 0, 'logpr...  \n",
       "3  {'finish_reason': 'length', 'index': 0, 'logpr...  \n",
       "4  {'finish_reason': 'length', 'index': 0, 'logpr...  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = df[['instruction', 'output_1', 'generator_1',  'output_2', 'generator_2', 'annotator', \n",
    "         'raw_completion']]\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "1411bf3d-0f3d-4410-8359-8944a6bcf679",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'finish_reason': 'length',\n",
       " 'index': 0,\n",
       " 'logprobs': {'content': [{'token': 'm',\n",
       "    'bytes': [109],\n",
       "    'logprob': -0.010449888000000001,\n",
       "    'top_logprobs': [{'token': 'm',\n",
       "      'bytes': [109],\n",
       "      'logprob': -0.010449888000000001},\n",
       "     {'token': 'M', 'bytes': [77], 'logprob': -4.6069846},\n",
       "     {'token': 'The', 'bytes': [84, 104, 101], 'logprob': -8.3615055},\n",
       "     {'token': 'Since',\n",
       "      'bytes': [83, 105, 110, 99, 101],\n",
       "      'logprob': -9.512136},\n",
       "     {'token': 'Based',\n",
       "      'bytes': [66, 97, 115, 101, 100],\n",
       "      'logprob': -10.232525}]}],\n",
       "  'refusal': None},\n",
       " 'message': {'content': 'm',\n",
       "  'role': 'assistant',\n",
       "  'function_call': None,\n",
       "  'tool_calls': None,\n",
       "  'refusal': None},\n",
       " 'text': 'm',\n",
       " 'total_tokens': 1000.0}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['raw_completion'].iloc[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "136e0b4c-47e4-4715-b4c0-d0e1c5841f17",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "b638d2f7-9011-407f-9ee1-eafb79f99c83",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>instruction</th>\n",
       "      <th>output_1</th>\n",
       "      <th>generator_1</th>\n",
       "      <th>output_2</th>\n",
       "      <th>generator_2</th>\n",
       "      <th>annotator</th>\n",
       "      <th>raw_completion</th>\n",
       "      <th>is_switched</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>What are the names of some famous actors that ...</td>\n",
       "      <td>Several famous actors started their careers on...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>How did US states get their names?</td>\n",
       "      <td>The names of U.S. states are derived from a va...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Hi, my sister and her girlfriends want me to p...</td>\n",
       "      <td>Kickball is a fun and simple game that is simi...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>What is some cool music from the 1920s?</td>\n",
       "      <td>The 1920s, often referred to as the \"Roaring T...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>How do I wrap a present neatly?</td>\n",
       "      <td>Wrapping a present neatly can be quite straigh...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                         instruction  \\\n",
       "0  What are the names of some famous actors that ...   \n",
       "1                 How did US states get their names?   \n",
       "2  Hi, my sister and her girlfriends want me to p...   \n",
       "3            What is some cool music from the 1920s?   \n",
       "4                    How do I wrap a present neatly?   \n",
       "\n",
       "                                            output_1        generator_1  \\\n",
       "0  Several famous actors started their careers on...  gpt4_1106_preview   \n",
       "1  The names of U.S. states are derived from a va...  gpt4_1106_preview   \n",
       "2  Kickball is a fun and simple game that is simi...  gpt4_1106_preview   \n",
       "3  The 1920s, often referred to as the \"Roaring T...  gpt4_1106_preview   \n",
       "4  Wrapping a present neatly can be quite straigh...  gpt4_1106_preview   \n",
       "\n",
       "                                            output_2 generator_2  \\\n",
       "0   } } } } } verify }:param } } } withObject }pr...     example   \n",
       "1   } } } } } verify }:param } } } withObject }pr...     example   \n",
       "2   } } } } } verify }:param } } } withObject }pr...     example   \n",
       "3   } } } } } verify }:param } } } withObject }pr...     example   \n",
       "4   } } } } } verify }:param } } } withObject }pr...     example   \n",
       "\n",
       "                         annotator  \\\n",
       "0  weighted_alpaca_eval_gpt4_turbo   \n",
       "1  weighted_alpaca_eval_gpt4_turbo   \n",
       "2  weighted_alpaca_eval_gpt4_turbo   \n",
       "3  weighted_alpaca_eval_gpt4_turbo   \n",
       "4  weighted_alpaca_eval_gpt4_turbo   \n",
       "\n",
       "                                      raw_completion  is_switched  \n",
       "0  {'finish_reason': 'length', 'index': 0, 'logpr...         True  \n",
       "1  {'finish_reason': 'length', 'index': 0, 'logpr...         True  \n",
       "2  {'finish_reason': 'length', 'index': 0, 'logpr...         True  \n",
       "3  {'finish_reason': 'length', 'index': 0, 'logpr...        False  \n",
       "4  {'finish_reason': 'length', 'index': 0, 'logpr...        False  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# That's the actual randomization that AlpacaEval uses, but feel free to use whatever in your case.\n",
    "arr_is_switched = df.apply(\n",
    "    lambda x: alpaca_eval.utils.random_seeded_choice(\n",
    "        seed=f\"is_switched_outputs{x['instruction']}0\", # some instruction dependent seed\n",
    "        choices=[False, True],\n",
    "    ),\n",
    "    axis=1,\n",
    ")\n",
    "\n",
    "df['is_switched'] = arr_is_switched\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2309e2cd-2ad2-4513-852b-c1b6a68658a1",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "8f495e86-22ae-4906-8328-55b3a00e7d20",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "is_switched\n",
       "True     406\n",
       "False    399\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['is_switched'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "810ae2e6-3311-4715-8540-d3d6b32d5a55",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "77e63e96-962f-4d60-8b1e-740ca2310bda",
   "metadata": {},
   "outputs": [],
   "source": [
    "# arr_is_switched"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "07af74e2-8831-4b34-8ed4-c354c80ed3d7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Option 1 for undoing randomization\n",
    "# This is the derandomization you need if you prefer derandomizing the raw_completion before computing preferences.\n",
    "# Benefit: can be computed before the preference and will be easier to interpret from the annotations.json\n",
    "def derandomize_tokens_inplace(x):\n",
    "    if x is None: return\n",
    "    # note that we only replace the top logprobs token as this is what `logprob_parser` uses\n",
    "    for el in x[\"logprobs\"][\"content\"][0][\"top_logprobs\"]:\n",
    "        if el[\"token\"] == \"m\":\n",
    "            el[\"token\"] = \"M\"\n",
    "        elif el[\"token\"] == \"M\":\n",
    "            el[\"token\"] = \"m\"\n",
    "\n",
    "for i in range(len(df)):\n",
    "    if df.iloc[i][\"is_switched\"]:\n",
    "        derandomize_tokens_inplace(df.iloc[i][\"raw_completion\"])\n",
    "\n",
    "# If you did everything correctly, then df would have the same format as yours. I.e. \n",
    "# ['instruction', 'output_1', 'generator_1',  'output_2', 'generator_2', 'annotator', 'raw_completion'] with undone randomization "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b64886fe-6f5b-487b-a0df-c2b6a1e828c0",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "236c42aa-7e39-47c0-9468-a6997c1c7186",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>instruction</th>\n",
       "      <th>output_1</th>\n",
       "      <th>generator_1</th>\n",
       "      <th>output_2</th>\n",
       "      <th>generator_2</th>\n",
       "      <th>annotator</th>\n",
       "      <th>raw_completion</th>\n",
       "      <th>is_switched</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>What are the names of some famous actors that ...</td>\n",
       "      <td>Several famous actors started their careers on...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>How did US states get their names?</td>\n",
       "      <td>The names of U.S. states are derived from a va...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Hi, my sister and her girlfriends want me to p...</td>\n",
       "      <td>Kickball is a fun and simple game that is simi...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>What is some cool music from the 1920s?</td>\n",
       "      <td>The 1920s, often referred to as the \"Roaring T...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>How do I wrap a present neatly?</td>\n",
       "      <td>Wrapping a present neatly can be quite straigh...</td>\n",
       "      <td>gpt4_1106_preview</td>\n",
       "      <td>} } } } } verify }:param } } } withObject }pr...</td>\n",
       "      <td>example</td>\n",
       "      <td>weighted_alpaca_eval_gpt4_turbo</td>\n",
       "      <td>{'finish_reason': 'length', 'index': 0, 'logpr...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                         instruction  \\\n",
       "0  What are the names of some famous actors that ...   \n",
       "1                 How did US states get their names?   \n",
       "2  Hi, my sister and her girlfriends want me to p...   \n",
       "3            What is some cool music from the 1920s?   \n",
       "4                    How do I wrap a present neatly?   \n",
       "\n",
       "                                            output_1        generator_1  \\\n",
       "0  Several famous actors started their careers on...  gpt4_1106_preview   \n",
       "1  The names of U.S. states are derived from a va...  gpt4_1106_preview   \n",
       "2  Kickball is a fun and simple game that is simi...  gpt4_1106_preview   \n",
       "3  The 1920s, often referred to as the \"Roaring T...  gpt4_1106_preview   \n",
       "4  Wrapping a present neatly can be quite straigh...  gpt4_1106_preview   \n",
       "\n",
       "                                            output_2 generator_2  \\\n",
       "0   } } } } } verify }:param } } } withObject }pr...     example   \n",
       "1   } } } } } verify }:param } } } withObject }pr...     example   \n",
       "2   } } } } } verify }:param } } } withObject }pr...     example   \n",
       "3   } } } } } verify }:param } } } withObject }pr...     example   \n",
       "4   } } } } } verify }:param } } } withObject }pr...     example   \n",
       "\n",
       "                         annotator  \\\n",
       "0  weighted_alpaca_eval_gpt4_turbo   \n",
       "1  weighted_alpaca_eval_gpt4_turbo   \n",
       "2  weighted_alpaca_eval_gpt4_turbo   \n",
       "3  weighted_alpaca_eval_gpt4_turbo   \n",
       "4  weighted_alpaca_eval_gpt4_turbo   \n",
       "\n",
       "                                      raw_completion  is_switched  \n",
       "0  {'finish_reason': 'length', 'index': 0, 'logpr...         True  \n",
       "1  {'finish_reason': 'length', 'index': 0, 'logpr...         True  \n",
       "2  {'finish_reason': 'length', 'index': 0, 'logpr...         True  \n",
       "3  {'finish_reason': 'length', 'index': 0, 'logpr...        False  \n",
       "4  {'finish_reason': 'length', 'index': 0, 'logpr...        False  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3f54de76-06b0-4c79-8aa8-da48f39cd4ef",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "1d119b4f-615e-46f6-a097-fed1b15a0f86",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'win_rate': 76.91979180386511, 'standard_error': 0.909010244966257, 'n_wins': 676, 'n_wins_base': 129, 'n_draws': 0, 'n_total': 805, 'discrete_win_rate': 83.97515527950311, 'length_controlled_winrate': 86.45780691307944, 'lc_standard_error': 0.1418000511342794}\n"
     ]
    }
   ],
   "source": [
    "# Step 4: Extract preference\n",
    "# Gets the preference of \"m\" vs \"M\". This can also be coded in a few lines. For historical reasons it returns values in 1 and 2. \n",
    "df[\"preference\"] = df[\"raw_completion\"].apply(lambda x: alpaca_eval.completion_parsers.logprob_parser(x, \n",
    "                                                            numerator_token=\"m\",\n",
    "                                                            denominator_tokens=[\"m\", \"M\"],\n",
    "                                                            is_binarize=False)[0] \n",
    "                                              if x is not None else float(\"nan\"))\n",
    "\n",
    "# Option 2 for undoing randomization\n",
    "# This is the derandomization that you need if you apply it after computing the preferences.\n",
    "# Benefit: simpler when there are many different potential prompts and when caching. This is what AlpacaEval uses.\n",
    "# Only do the following if you didn't derandomize the raw_completion before. \n",
    "# df[\"preference\"] = np.where(df[\"is_switched\"], 3-df[\"preference\"], df[\"preference\"])\n",
    "\n",
    "# Step 5 & 6: Length control and get result\n",
    "metrics = alpaca_eval.metrics.get_length_controlled_winrate(df, \n",
    "                                                            save_weights_dir=None,\n",
    "                                                            # adds 'glm_preference' to df\n",
    "                                                            is_add_glm_preference_inplace=True)\n",
    "print(metrics)\n",
    "# {'win_rate': 28.989564293901843,\n",
    " # 'standard_error': 1.397245743554741,\n",
    " # 'n_wins': 223,\n",
    " # 'n_wins_base': 580,\n",
    " # 'n_draws': 0,\n",
    " # 'n_total': 803,\n",
    " # 'discrete_win_rate': 27.770859277708592,\n",
    " # 'length_controlled_winrate': 40.4779345913862}\n",
    "\n",
    "# Save df as annotations.json\n",
    "\n",
    "# df.to_json(\"annotations.json\", orient=\"records\", indent=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7aa13d17-7452-4817-9343-374380f33e89",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "71e923c7-a618-4387-a7b0-0d86aa78240a",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "84be1fe3-2dde-415a-b828-0820bc583f97",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
