{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "cb769429",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "6a9a965b",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = 'llama-7b'\n",
    "model = 'pythia-1b'\n",
    "\n",
    "split = 'test'\n",
    "\n",
    "gold_scores = [[] for _ in range(2)]\n",
    "samples = []\n",
    "for gpu in range(1, 8):\n",
    "    generations_file = f\"pairwise_generations_{split}_{model}_{gpu}.csv\"\n",
    "    generations = pd.read_csv(generations_file, header=None)[0]\n",
    "    fname = f\"scores_{generations_file[:-4]}_gold.pkl\"\n",
    "    with open(fname, 'rb') as f:\n",
    "        scores = pickle.load(f)\n",
    "    samples.extend(generations)\n",
    "    for i in range(len(gold_scores)):\n",
    "        gold_scores[i].extend(scores[i])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "6314f7f0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((27104,), (27104, 2))"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "samples = np.array(samples)\n",
    "gold_scores = np.array(gold_scores).T\n",
    "samples.shape, gold_scores.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "a53e57cb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([-1.19453931, -4.4642705 ]), array([2.54230953, 1.70640247]))"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gold_scores.mean(0), gold_scores.std(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "da2e96f9",
   "metadata": {},
   "outputs": [],
   "source": [
    "gold_scores = (gold_scores - gold_scores.mean(0)) / gold_scores.std(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "fbe81799",
   "metadata": {},
   "outputs": [],
   "source": [
    "samples_pairs = samples.reshape((-1, 2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "875dc47b",
   "metadata": {},
   "outputs": [],
   "source": [
    "scores_pairs = []\n",
    "predictions = []\n",
    "for i in range(gold_scores.shape[-1]):\n",
    "    pairs = gold_scores[:,i].reshape((-1, 2))\n",
    "    scores_pairs.append(pairs)\n",
    "    predictions.append(pairs[:,0] > pairs[:,1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "82d0ae9b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(13552,)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(predictions[0] == predictions[1]).shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "04ff7c0c",
   "metadata": {},
   "outputs": [],
   "source": [
    "ensemble_scores = (gold_scores[:,0] + gold_scores[:,1]).reshape((-1, 2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "97210235",
   "metadata": {},
   "outputs": [],
   "source": [
    "ensemble_preds = ensemble_scores[:,0] > ensemble_scores[:,1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "fb3fd6e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "a_examples, b_examples = [],[]\n",
    "for sample, pred in zip(samples_pairs, ensemble_preds):\n",
    "    pred = int(pred)\n",
    "    a_examples.append(sample[1 - pred])\n",
    "    b_examples.append(sample[pred])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "id": "f26bdde9",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame({0: a_examples, 1: b_examples})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 190,
   "id": "d860b625",
   "metadata": {},
   "outputs": [],
   "source": [
    "out_fname = f\"ranking_datasets/gm_labeled/{split}.csv\"\n",
    "df.to_csv(out_fname, escapechar='\\\\', mode=\"a\", header=False, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "id": "373f457d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>\\n\\nHuman: Kenny Rogers and Dolly Parton colla...</td>\n",
       "      <td>\\n\\nHuman: Kenny Rogers and Dolly Parton colla...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>\\n\\nHuman: How can I break up with someone pea...</td>\n",
       "      <td>\\n\\nHuman: How can I break up with someone pea...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>\\n\\nHuman: What are the positive benefits of r...</td>\n",
       "      <td>\\n\\nHuman: What are the positive benefits of r...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>\\n\\nHuman: There's a stray dog wandering aroun...</td>\n",
       "      <td>\\n\\nHuman: There's a stray dog wandering aroun...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>\\n\\nHuman: What do you think of fat people\\n\\n...</td>\n",
       "      <td>\\n\\nHuman: What do you think of fat people\\n\\n...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                   0  \\\n",
       "0  \\n\\nHuman: Kenny Rogers and Dolly Parton colla...   \n",
       "1  \\n\\nHuman: How can I break up with someone pea...   \n",
       "2  \\n\\nHuman: What are the positive benefits of r...   \n",
       "3  \\n\\nHuman: There's a stray dog wandering aroun...   \n",
       "4  \\n\\nHuman: What do you think of fat people\\n\\n...   \n",
       "\n",
       "                                                   1  \n",
       "0  \\n\\nHuman: Kenny Rogers and Dolly Parton colla...  \n",
       "1  \\n\\nHuman: How can I break up with someone pea...  \n",
       "2  \\n\\nHuman: What are the positive benefits of r...  \n",
       "3  \\n\\nHuman: There's a stray dog wandering aroun...  \n",
       "4  \\n\\nHuman: What do you think of fat people\\n\\n...  "
      ]
     },
     "execution_count": 164,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(out_fname, header=None)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "id": "08487eaf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "279808"
      ]
     },
     "execution_count": 159,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "id": "f9fffaa0",
   "metadata": {},
   "outputs": [],
   "source": [
    "short_df = df.sample(n=100000, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "id": "35ff030f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "100000"
      ]
     },
     "execution_count": 161,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(short_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "id": "9644054d",
   "metadata": {},
   "outputs": [],
   "source": [
    "out_fname = f\"ranking_datasets/gm_labeled/{split}.csv\"\n",
    "short_df.to_csv(out_fname, escapechar='\\\\', header=False, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "eb4d4b8c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.932102426627851"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ensemble_scores.std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "5585e422",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.9271931703994465"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ensemble_scores.std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "561eccc0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9648238992568243"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(1.932102426627851 + 1.9271931703994465) / 4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cca65a46",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
