{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>question</th>\n",
       "      <th>answer</th>\n",
       "      <th>example</th>\n",
       "      <th>sorted_index</th>\n",
       "      <th>model_avg_acc</th>\n",
       "      <th>difficulty</th>\n",
       "      <th>difficulty_std</th>\n",
       "      <th>discrimination</th>\n",
       "      <th>guessing</th>\n",
       "      <th>feasibility</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>$18 is shared among 3 boys. The eldest added $...</td>\n",
       "      <td>Each boy gets $18/3 = $&lt;&lt;18/3=6&gt;&gt;6.\\nThe eldes...</td>\n",
       "      <td>$18 is shared among 3 boys. The eldest added $...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.063063</td>\n",
       "      <td>3.899119</td>\n",
       "      <td>3.819679</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100 people apply for a job at Google. Of the p...</td>\n",
       "      <td>The number of people that receive interviews i...</td>\n",
       "      <td>100 people apply for a job at Google. Of the p...</td>\n",
       "      <td>1</td>\n",
       "      <td>0.108108</td>\n",
       "      <td>0.954773</td>\n",
       "      <td>2.386477</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>15 gallons of gas were equally divided into 5 ...</td>\n",
       "      <td>15 gallons = 120 pints\\n120/5 = &lt;&lt;120/5=24&gt;&gt;24...</td>\n",
       "      <td>15 gallons of gas were equally divided into 5 ...</td>\n",
       "      <td>2</td>\n",
       "      <td>0.027027</td>\n",
       "      <td>13.105968</td>\n",
       "      <td>7.122644</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>20% of 50 people think horse #2 will win the b...</td>\n",
       "      <td>20% of 50 people are pulling for horse #2 so t...</td>\n",
       "      <td>20% of 50 people think horse #2 will win the b...</td>\n",
       "      <td>3</td>\n",
       "      <td>0.027027</td>\n",
       "      <td>9.983851</td>\n",
       "      <td>5.564509</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>200 pounds of carrots are to be distributed to...</td>\n",
       "      <td>The restaurants need 40 * 2 = &lt;&lt;40*2=80&gt;&gt;80 lb...</td>\n",
       "      <td>200 pounds of carrots are to be distributed to...</td>\n",
       "      <td>4</td>\n",
       "      <td>0.027027</td>\n",
       "      <td>9.433002</td>\n",
       "      <td>5.058318</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1314</th>\n",
       "      <td>You can buy a movie super ticket for $20 that ...</td>\n",
       "      <td>The super ticket costs $20 and for $1 extra I ...</td>\n",
       "      <td>You can buy a movie super ticket for $20 that ...</td>\n",
       "      <td>1314</td>\n",
       "      <td>0.108108</td>\n",
       "      <td>1.571185</td>\n",
       "      <td>3.099334</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1315</th>\n",
       "      <td>Zack's locker is half as big as Timothy's lock...</td>\n",
       "      <td>Zack's locker is 5*4=&lt;&lt;5*4=20&gt;&gt;20 cubic inches...</td>\n",
       "      <td>Zack's locker is half as big as Timothy's lock...</td>\n",
       "      <td>1315</td>\n",
       "      <td>0.045045</td>\n",
       "      <td>6.490000</td>\n",
       "      <td>4.519505</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1316</th>\n",
       "      <td>Zaid spends 1/4 of his salary on rent, 1/3 on ...</td>\n",
       "      <td>When Zaid spend 1/4 of his salary on rent, he ...</td>\n",
       "      <td>Zaid spends 1/4 of his salary on rent, 1/3 on ...</td>\n",
       "      <td>1316</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>22.798063</td>\n",
       "      <td>9.723783</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1317</th>\n",
       "      <td>Zeke's baseball team has 7 more players than C...</td>\n",
       "      <td>If Carlton's team has 13 players, Zeke's team ...</td>\n",
       "      <td>Zeke's baseball team has 7 more players than C...</td>\n",
       "      <td>1317</td>\n",
       "      <td>0.054054</td>\n",
       "      <td>4.752416</td>\n",
       "      <td>3.795304</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1318</th>\n",
       "      <td>Zoey and Sydney are having a watermelon seed s...</td>\n",
       "      <td>Zoey spits a total distance of 400 feet becaus...</td>\n",
       "      <td>Zoey and Sydney are having a watermelon seed s...</td>\n",
       "      <td>1318</td>\n",
       "      <td>0.081081</td>\n",
       "      <td>2.552320</td>\n",
       "      <td>2.989116</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1319 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               question  \\\n",
       "0     $18 is shared among 3 boys. The eldest added $...   \n",
       "1     100 people apply for a job at Google. Of the p...   \n",
       "2     15 gallons of gas were equally divided into 5 ...   \n",
       "3     20% of 50 people think horse #2 will win the b...   \n",
       "4     200 pounds of carrots are to be distributed to...   \n",
       "...                                                 ...   \n",
       "1314  You can buy a movie super ticket for $20 that ...   \n",
       "1315  Zack's locker is half as big as Timothy's lock...   \n",
       "1316  Zaid spends 1/4 of his salary on rent, 1/3 on ...   \n",
       "1317  Zeke's baseball team has 7 more players than C...   \n",
       "1318  Zoey and Sydney are having a watermelon seed s...   \n",
       "\n",
       "                                                 answer  \\\n",
       "0     Each boy gets $18/3 = $<<18/3=6>>6.\\nThe eldes...   \n",
       "1     The number of people that receive interviews i...   \n",
       "2     15 gallons = 120 pints\\n120/5 = <<120/5=24>>24...   \n",
       "3     20% of 50 people are pulling for horse #2 so t...   \n",
       "4     The restaurants need 40 * 2 = <<40*2=80>>80 lb...   \n",
       "...                                                 ...   \n",
       "1314  The super ticket costs $20 and for $1 extra I ...   \n",
       "1315  Zack's locker is 5*4=<<5*4=20>>20 cubic inches...   \n",
       "1316  When Zaid spend 1/4 of his salary on rent, he ...   \n",
       "1317  If Carlton's team has 13 players, Zeke's team ...   \n",
       "1318  Zoey spits a total distance of 400 feet becaus...   \n",
       "\n",
       "                                                example  sorted_index  \\\n",
       "0     $18 is shared among 3 boys. The eldest added $...             0   \n",
       "1     100 people apply for a job at Google. Of the p...             1   \n",
       "2     15 gallons of gas were equally divided into 5 ...             2   \n",
       "3     20% of 50 people think horse #2 will win the b...             3   \n",
       "4     200 pounds of carrots are to be distributed to...             4   \n",
       "...                                                 ...           ...   \n",
       "1314  You can buy a movie super ticket for $20 that ...          1314   \n",
       "1315  Zack's locker is half as big as Timothy's lock...          1315   \n",
       "1316  Zaid spends 1/4 of his salary on rent, 1/3 on ...          1316   \n",
       "1317  Zeke's baseball team has 7 more players than C...          1317   \n",
       "1318  Zoey and Sydney are having a watermelon seed s...          1318   \n",
       "\n",
       "      model_avg_acc  difficulty  difficulty_std  discrimination  guessing  \\\n",
       "0          0.063063    3.899119        3.819679             NaN       NaN   \n",
       "1          0.108108    0.954773        2.386477             NaN       NaN   \n",
       "2          0.027027   13.105968        7.122644             NaN       NaN   \n",
       "3          0.027027    9.983851        5.564509             NaN       NaN   \n",
       "4          0.027027    9.433002        5.058318             NaN       NaN   \n",
       "...             ...         ...             ...             ...       ...   \n",
       "1314       0.108108    1.571185        3.099334             NaN       NaN   \n",
       "1315       0.045045    6.490000        4.519505             NaN       NaN   \n",
       "1316       0.000000   22.798063        9.723783             NaN       NaN   \n",
       "1317       0.054054    4.752416        3.795304             NaN       NaN   \n",
       "1318       0.081081    2.552320        2.989116             NaN       NaN   \n",
       "\n",
       "      feasibility  \n",
       "0             NaN  \n",
       "1             NaN  \n",
       "2             NaN  \n",
       "3             NaN  \n",
       "4             NaN  \n",
       "...           ...  \n",
       "1314          NaN  \n",
       "1315          NaN  \n",
       "1316          NaN  \n",
       "1317          NaN  \n",
       "1318          NaN  \n",
       "\n",
       "[1319 rows x 10 columns]"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from datasets import load_dataset\n",
    "\n",
    "\n",
    "df = load_dataset(\"mcding-org/Easy2Hard-GSM8K\", \"v5_1\", cache_dir=\"./cache\")[\n",
    "    \"model_1PL_lr_0.05_epochs_3200_sha_9\"\n",
    "].to_pandas()\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>rating</th>\n",
       "      <th>rating_std</th>\n",
       "      <th>rating_quantile</th>\n",
       "      <th>question</th>\n",
       "      <th>answer</th>\n",
       "      <th>example</th>\n",
       "      <th>model_avg_acc</th>\n",
       "      <th>unnorm_rating</th>\n",
       "      <th>unnorm_rating_std</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.238991</td>\n",
       "      <td>0.138851</td>\n",
       "      <td>0.198635</td>\n",
       "      <td>$18 is shared among 3 boys. The eldest added $...</td>\n",
       "      <td>Each boy gets $18/3 = $&lt;&lt;18/3=6&gt;&gt;6.\\nThe eldes...</td>\n",
       "      <td>$18 is shared among 3 boys. The eldest added $...</td>\n",
       "      <td>0.063063</td>\n",
       "      <td>3.899119</td>\n",
       "      <td>3.819679</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.131960</td>\n",
       "      <td>0.086752</td>\n",
       "      <td>0.045489</td>\n",
       "      <td>100 people apply for a job at Google. Of the p...</td>\n",
       "      <td>The number of people that receive interviews i...</td>\n",
       "      <td>100 people apply for a job at Google. Of the p...</td>\n",
       "      <td>0.108108</td>\n",
       "      <td>0.954773</td>\n",
       "      <td>2.386477</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.573673</td>\n",
       "      <td>0.258918</td>\n",
       "      <td>0.684610</td>\n",
       "      <td>15 gallons of gas were equally divided into 5 ...</td>\n",
       "      <td>15 gallons = 120 pints\\n120/5 = &lt;&lt;120/5=24&gt;&gt;24...</td>\n",
       "      <td>15 gallons of gas were equally divided into 5 ...</td>\n",
       "      <td>0.027027</td>\n",
       "      <td>13.105968</td>\n",
       "      <td>7.122644</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.460179</td>\n",
       "      <td>0.202278</td>\n",
       "      <td>0.523124</td>\n",
       "      <td>20% of 50 people think horse #2 will win the b...</td>\n",
       "      <td>20% of 50 people are pulling for horse #2 so t...</td>\n",
       "      <td>20% of 50 people think horse #2 will win the b...</td>\n",
       "      <td>0.027027</td>\n",
       "      <td>9.983851</td>\n",
       "      <td>5.564509</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.440155</td>\n",
       "      <td>0.183877</td>\n",
       "      <td>0.501895</td>\n",
       "      <td>200 pounds of carrots are to be distributed to...</td>\n",
       "      <td>The restaurants need 40 * 2 = &lt;&lt;40*2=80&gt;&gt;80 lb...</td>\n",
       "      <td>200 pounds of carrots are to be distributed to...</td>\n",
       "      <td>0.027027</td>\n",
       "      <td>9.433002</td>\n",
       "      <td>5.058318</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1314</th>\n",
       "      <td>0.154367</td>\n",
       "      <td>0.112665</td>\n",
       "      <td>0.072024</td>\n",
       "      <td>You can buy a movie super ticket for $20 that ...</td>\n",
       "      <td>The super ticket costs $20 and for $1 extra I ...</td>\n",
       "      <td>You can buy a movie super ticket for $20 that ...</td>\n",
       "      <td>0.108108</td>\n",
       "      <td>1.571185</td>\n",
       "      <td>3.099334</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1315</th>\n",
       "      <td>0.333173</td>\n",
       "      <td>0.164290</td>\n",
       "      <td>0.360121</td>\n",
       "      <td>Zack's locker is half as big as Timothy's lock...</td>\n",
       "      <td>Zack's locker is 5*4=&lt;&lt;5*4=20&gt;&gt;20 cubic inches...</td>\n",
       "      <td>Zack's locker is half as big as Timothy's lock...</td>\n",
       "      <td>0.045045</td>\n",
       "      <td>6.490000</td>\n",
       "      <td>4.519505</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1316</th>\n",
       "      <td>0.925994</td>\n",
       "      <td>0.353473</td>\n",
       "      <td>0.994693</td>\n",
       "      <td>Zaid spends 1/4 of his salary on rent, 1/3 on ...</td>\n",
       "      <td>When Zaid spend 1/4 of his salary on rent, he ...</td>\n",
       "      <td>Zaid spends 1/4 of his salary on rent, 1/3 on ...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>22.798063</td>\n",
       "      <td>9.723783</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1317</th>\n",
       "      <td>0.270009</td>\n",
       "      <td>0.137965</td>\n",
       "      <td>0.249431</td>\n",
       "      <td>Zeke's baseball team has 7 more players than C...</td>\n",
       "      <td>If Carlton's team has 13 players, Zeke's team ...</td>\n",
       "      <td>Zeke's baseball team has 7 more players than C...</td>\n",
       "      <td>0.054054</td>\n",
       "      <td>4.752416</td>\n",
       "      <td>3.795304</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1318</th>\n",
       "      <td>0.190033</td>\n",
       "      <td>0.108659</td>\n",
       "      <td>0.123578</td>\n",
       "      <td>Zoey and Sydney are having a watermelon seed s...</td>\n",
       "      <td>Zoey spits a total distance of 400 feet becaus...</td>\n",
       "      <td>Zoey and Sydney are having a watermelon seed s...</td>\n",
       "      <td>0.081081</td>\n",
       "      <td>2.552320</td>\n",
       "      <td>2.989116</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1319 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        rating  rating_std  rating_quantile  \\\n",
       "0     0.238991    0.138851         0.198635   \n",
       "1     0.131960    0.086752         0.045489   \n",
       "2     0.573673    0.258918         0.684610   \n",
       "3     0.460179    0.202278         0.523124   \n",
       "4     0.440155    0.183877         0.501895   \n",
       "...        ...         ...              ...   \n",
       "1314  0.154367    0.112665         0.072024   \n",
       "1315  0.333173    0.164290         0.360121   \n",
       "1316  0.925994    0.353473         0.994693   \n",
       "1317  0.270009    0.137965         0.249431   \n",
       "1318  0.190033    0.108659         0.123578   \n",
       "\n",
       "                                               question  \\\n",
       "0     $18 is shared among 3 boys. The eldest added $...   \n",
       "1     100 people apply for a job at Google. Of the p...   \n",
       "2     15 gallons of gas were equally divided into 5 ...   \n",
       "3     20% of 50 people think horse #2 will win the b...   \n",
       "4     200 pounds of carrots are to be distributed to...   \n",
       "...                                                 ...   \n",
       "1314  You can buy a movie super ticket for $20 that ...   \n",
       "1315  Zack's locker is half as big as Timothy's lock...   \n",
       "1316  Zaid spends 1/4 of his salary on rent, 1/3 on ...   \n",
       "1317  Zeke's baseball team has 7 more players than C...   \n",
       "1318  Zoey and Sydney are having a watermelon seed s...   \n",
       "\n",
       "                                                 answer  \\\n",
       "0     Each boy gets $18/3 = $<<18/3=6>>6.\\nThe eldes...   \n",
       "1     The number of people that receive interviews i...   \n",
       "2     15 gallons = 120 pints\\n120/5 = <<120/5=24>>24...   \n",
       "3     20% of 50 people are pulling for horse #2 so t...   \n",
       "4     The restaurants need 40 * 2 = <<40*2=80>>80 lb...   \n",
       "...                                                 ...   \n",
       "1314  The super ticket costs $20 and for $1 extra I ...   \n",
       "1315  Zack's locker is 5*4=<<5*4=20>>20 cubic inches...   \n",
       "1316  When Zaid spend 1/4 of his salary on rent, he ...   \n",
       "1317  If Carlton's team has 13 players, Zeke's team ...   \n",
       "1318  Zoey spits a total distance of 400 feet becaus...   \n",
       "\n",
       "                                                example  model_avg_acc  \\\n",
       "0     $18 is shared among 3 boys. The eldest added $...       0.063063   \n",
       "1     100 people apply for a job at Google. Of the p...       0.108108   \n",
       "2     15 gallons of gas were equally divided into 5 ...       0.027027   \n",
       "3     20% of 50 people think horse #2 will win the b...       0.027027   \n",
       "4     200 pounds of carrots are to be distributed to...       0.027027   \n",
       "...                                                 ...            ...   \n",
       "1314  You can buy a movie super ticket for $20 that ...       0.108108   \n",
       "1315  Zack's locker is half as big as Timothy's lock...       0.045045   \n",
       "1316  Zaid spends 1/4 of his salary on rent, 1/3 on ...       0.000000   \n",
       "1317  Zeke's baseball team has 7 more players than C...       0.054054   \n",
       "1318  Zoey and Sydney are having a watermelon seed s...       0.081081   \n",
       "\n",
       "      unnorm_rating  unnorm_rating_std  \n",
       "0          3.899119           3.819679  \n",
       "1          0.954773           2.386477  \n",
       "2         13.105968           7.122644  \n",
       "3          9.983851           5.564509  \n",
       "4          9.433002           5.058318  \n",
       "...             ...                ...  \n",
       "1314       1.571185           3.099334  \n",
       "1315       6.490000           4.519505  \n",
       "1316      22.798063           9.723783  \n",
       "1317       4.752416           3.795304  \n",
       "1318       2.552320           2.989116  \n",
       "\n",
       "[1319 rows x 9 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from datasets import Dataset, DatasetDict\n",
    "\n",
    "# Normalize rating\n",
    "df[\"rating\"] = (df[\"difficulty\"] - df[\"difficulty\"].min()) / (\n",
    "    df[\"difficulty\"].max() - df[\"difficulty\"].min()\n",
    ")\n",
    "df[\"rating_std\"] = df[\"difficulty_std\"] / (\n",
    "    df[\"difficulty\"].max() - df[\"difficulty\"].min()\n",
    ")\n",
    "df[\"rating_quantile\"] = df[\"rating\"].rank(pct=True)\n",
    "\n",
    "df.drop(\n",
    "    columns=[\"sorted_index\", \"discrimination\", \"guessing\", \"feasibility\"], inplace=True\n",
    ")\n",
    "rename_dict = (\n",
    "    {\n",
    "        \"rating\": \"rating\",\n",
    "        \"rating_std\": \"rating_std\",\n",
    "        \"rating_quantile\": \"rating_quantile\",\n",
    "    }\n",
    "    | {c: c for c in df.columns if c not in [\"difficulty\", \"difficulty_std\"]}\n",
    "    | {\n",
    "        \"difficulty\": \"unnorm_rating\",\n",
    "        \"difficulty_std\": \"unnorm_rating_std\",\n",
    "    }\n",
    ")\n",
    "df = df[rename_dict.keys()]\n",
    "df.rename(\n",
    "    columns=rename_dict,\n",
    "    inplace=True,\n",
    ")\n",
    "\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1de108003732407bbcd72e6004415950",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving the dataset (0/1 shards):   0%|          | 0/1319 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "DatasetDict({\"eval\": Dataset.from_pandas(df.reset_index(drop=True))}).save_to_disk(\n",
    "    \"./prepub/GSM8K\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>question</th>\n",
       "      <th>choices</th>\n",
       "      <th>answerKey</th>\n",
       "      <th>example</th>\n",
       "      <th>sorted_index</th>\n",
       "      <th>model_avg_acc</th>\n",
       "      <th>difficulty</th>\n",
       "      <th>difficulty_std</th>\n",
       "      <th>discrimination</th>\n",
       "      <th>guessing</th>\n",
       "      <th>feasibility</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>VASoL_2007_5_22</td>\n",
       "      <td>22 Sandy is conducting an investigation to fin...</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D'], 'text': ['The ...</td>\n",
       "      <td>B</td>\n",
       "      <td>Question: 22 Sandy is conducting an investigat...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.729730</td>\n",
       "      <td>0.578169</td>\n",
       "      <td>1.614857</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.695026</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Mercury_414352</td>\n",
       "      <td>A 0.20 kg softball travels 97 meters (m) south...</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D'], 'text': ['The ...</td>\n",
       "      <td>A</td>\n",
       "      <td>Question: A 0.20 kg softball travels 97 meters...</td>\n",
       "      <td>1</td>\n",
       "      <td>0.486486</td>\n",
       "      <td>-1.984390</td>\n",
       "      <td>1.071476</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.372527</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>MCAS_2005_9_6</td>\n",
       "      <td>A 1500 kg car increases its speed by 2 m/s for...</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D'], 'text': ['750 ...</td>\n",
       "      <td>C</td>\n",
       "      <td>Question: A 1500 kg car increases its speed by...</td>\n",
       "      <td>2</td>\n",
       "      <td>0.513514</td>\n",
       "      <td>1.259977</td>\n",
       "      <td>1.622572</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.489800</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Mercury_403234</td>\n",
       "      <td>A 20 N object is placed on a surface and start...</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D'], 'text': ['Grav...</td>\n",
       "      <td>B</td>\n",
       "      <td>Question: A 20 N object is placed on a surface...</td>\n",
       "      <td>3</td>\n",
       "      <td>0.243243</td>\n",
       "      <td>4.421014</td>\n",
       "      <td>1.714306</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.243239</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Mercury_402207</td>\n",
       "      <td>A 60-kg man and a 25-kg boy, both on roller sk...</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D'], 'text': ['The ...</td>\n",
       "      <td>B</td>\n",
       "      <td>Question: A 60-kg man and a 25-kg boy, both on...</td>\n",
       "      <td>4</td>\n",
       "      <td>0.324324</td>\n",
       "      <td>-1.747676</td>\n",
       "      <td>1.009565</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.144643</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1167</th>\n",
       "      <td>Mercury_7005005</td>\n",
       "      <td>Winds blowing inland from oceans tend to have ...</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D'], 'text': ['Ther...</td>\n",
       "      <td>C</td>\n",
       "      <td>Question: Winds blowing inland from oceans ten...</td>\n",
       "      <td>1167</td>\n",
       "      <td>0.324324</td>\n",
       "      <td>-2.423549</td>\n",
       "      <td>0.637539</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.081576</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1168</th>\n",
       "      <td>Mercury_7198468</td>\n",
       "      <td>Wolves, which are top predators, were eliminat...</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D'], 'text': ['an i...</td>\n",
       "      <td>A</td>\n",
       "      <td>Question: Wolves, which are top predators, wer...</td>\n",
       "      <td>1168</td>\n",
       "      <td>0.270270</td>\n",
       "      <td>3.575032</td>\n",
       "      <td>1.711608</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.268062</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1169</th>\n",
       "      <td>TIMSS_1995_8_N4</td>\n",
       "      <td>Years ago farmers found that corn plants grew ...</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D', 'E'], 'text': [...</td>\n",
       "      <td>B</td>\n",
       "      <td>Question: Years ago farmers found that corn pl...</td>\n",
       "      <td>1169</td>\n",
       "      <td>0.486486</td>\n",
       "      <td>1.026934</td>\n",
       "      <td>1.415864</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.436878</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1170</th>\n",
       "      <td>LEAP__4_10225</td>\n",
       "      <td>You are getting up to go to school in Louisian...</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D'], 'text': ['Eart...</td>\n",
       "      <td>B</td>\n",
       "      <td>Question: You are getting up to go to school i...</td>\n",
       "      <td>1170</td>\n",
       "      <td>0.270270</td>\n",
       "      <td>-2.070266</td>\n",
       "      <td>0.633091</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.050889</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1171</th>\n",
       "      <td>Mercury_SC_416177</td>\n",
       "      <td>Zinc is a mineral that helps</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D'], 'text': ['keep...</td>\n",
       "      <td>C</td>\n",
       "      <td>Question: Zinc is a mineral that helps\\nAnswer:</td>\n",
       "      <td>1171</td>\n",
       "      <td>0.243243</td>\n",
       "      <td>0.269726</td>\n",
       "      <td>1.142296</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.169766</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1172 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                     id                                           question  \\\n",
       "0       VASoL_2007_5_22  22 Sandy is conducting an investigation to fin...   \n",
       "1        Mercury_414352  A 0.20 kg softball travels 97 meters (m) south...   \n",
       "2         MCAS_2005_9_6  A 1500 kg car increases its speed by 2 m/s for...   \n",
       "3        Mercury_403234  A 20 N object is placed on a surface and start...   \n",
       "4        Mercury_402207  A 60-kg man and a 25-kg boy, both on roller sk...   \n",
       "...                 ...                                                ...   \n",
       "1167    Mercury_7005005  Winds blowing inland from oceans tend to have ...   \n",
       "1168    Mercury_7198468  Wolves, which are top predators, were eliminat...   \n",
       "1169    TIMSS_1995_8_N4  Years ago farmers found that corn plants grew ...   \n",
       "1170      LEAP__4_10225  You are getting up to go to school in Louisian...   \n",
       "1171  Mercury_SC_416177                       Zinc is a mineral that helps   \n",
       "\n",
       "                                                choices answerKey  \\\n",
       "0     {'label': ['A', 'B', 'C', 'D'], 'text': ['The ...         B   \n",
       "1     {'label': ['A', 'B', 'C', 'D'], 'text': ['The ...         A   \n",
       "2     {'label': ['A', 'B', 'C', 'D'], 'text': ['750 ...         C   \n",
       "3     {'label': ['A', 'B', 'C', 'D'], 'text': ['Grav...         B   \n",
       "4     {'label': ['A', 'B', 'C', 'D'], 'text': ['The ...         B   \n",
       "...                                                 ...       ...   \n",
       "1167  {'label': ['A', 'B', 'C', 'D'], 'text': ['Ther...         C   \n",
       "1168  {'label': ['A', 'B', 'C', 'D'], 'text': ['an i...         A   \n",
       "1169  {'label': ['A', 'B', 'C', 'D', 'E'], 'text': [...         B   \n",
       "1170  {'label': ['A', 'B', 'C', 'D'], 'text': ['Eart...         B   \n",
       "1171  {'label': ['A', 'B', 'C', 'D'], 'text': ['keep...         C   \n",
       "\n",
       "                                                example  sorted_index  \\\n",
       "0     Question: 22 Sandy is conducting an investigat...             0   \n",
       "1     Question: A 0.20 kg softball travels 97 meters...             1   \n",
       "2     Question: A 1500 kg car increases its speed by...             2   \n",
       "3     Question: A 20 N object is placed on a surface...             3   \n",
       "4     Question: A 60-kg man and a 25-kg boy, both on...             4   \n",
       "...                                                 ...           ...   \n",
       "1167  Question: Winds blowing inland from oceans ten...          1167   \n",
       "1168  Question: Wolves, which are top predators, wer...          1168   \n",
       "1169  Question: Years ago farmers found that corn pl...          1169   \n",
       "1170  Question: You are getting up to go to school i...          1170   \n",
       "1171    Question: Zinc is a mineral that helps\\nAnswer:          1171   \n",
       "\n",
       "      model_avg_acc  difficulty  difficulty_std  discrimination  guessing  \\\n",
       "0          0.729730    0.578169        1.614857             NaN  0.695026   \n",
       "1          0.486486   -1.984390        1.071476             NaN  0.372527   \n",
       "2          0.513514    1.259977        1.622572             NaN  0.489800   \n",
       "3          0.243243    4.421014        1.714306             NaN  0.243239   \n",
       "4          0.324324   -1.747676        1.009565             NaN  0.144643   \n",
       "...             ...         ...             ...             ...       ...   \n",
       "1167       0.324324   -2.423549        0.637539             NaN  0.081576   \n",
       "1168       0.270270    3.575032        1.711608             NaN  0.268062   \n",
       "1169       0.486486    1.026934        1.415864             NaN  0.436878   \n",
       "1170       0.270270   -2.070266        0.633091             NaN  0.050889   \n",
       "1171       0.243243    0.269726        1.142296             NaN  0.169766   \n",
       "\n",
       "      feasibility  \n",
       "0             NaN  \n",
       "1             NaN  \n",
       "2             NaN  \n",
       "3             NaN  \n",
       "4             NaN  \n",
       "...           ...  \n",
       "1167          NaN  \n",
       "1168          NaN  \n",
       "1169          NaN  \n",
       "1170          NaN  \n",
       "1171          NaN  \n",
       "\n",
       "[1172 rows x 12 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from datasets import load_dataset\n",
    "\n",
    "\n",
    "df = load_dataset(\"mcding-org/Easy2Hard-ARC\", \"v5_1\", cache_dir=\"./cache\")[\n",
    "    \"model_1gPL_lr_0.1_epochs_1600_sha_9\"\n",
    "].to_pandas()\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>rating</th>\n",
       "      <th>rating_std</th>\n",
       "      <th>rating_quantile</th>\n",
       "      <th>id</th>\n",
       "      <th>question</th>\n",
       "      <th>choices</th>\n",
       "      <th>answerKey</th>\n",
       "      <th>example</th>\n",
       "      <th>model_avg_acc</th>\n",
       "      <th>unnorm_rating</th>\n",
       "      <th>unnorm_rating_std</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.633545</td>\n",
       "      <td>0.147859</td>\n",
       "      <td>0.569113</td>\n",
       "      <td>VASoL_2007_5_22</td>\n",
       "      <td>22 Sandy is conducting an investigation to fin...</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D'], 'text': ['The ...</td>\n",
       "      <td>B</td>\n",
       "      <td>Question: 22 Sandy is conducting an investigat...</td>\n",
       "      <td>0.729730</td>\n",
       "      <td>0.578169</td>\n",
       "      <td>1.614857</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.398912</td>\n",
       "      <td>0.098106</td>\n",
       "      <td>0.255973</td>\n",
       "      <td>Mercury_414352</td>\n",
       "      <td>A 0.20 kg softball travels 97 meters (m) south...</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D'], 'text': ['The ...</td>\n",
       "      <td>A</td>\n",
       "      <td>Question: A 0.20 kg softball travels 97 meters...</td>\n",
       "      <td>0.486486</td>\n",
       "      <td>-1.984390</td>\n",
       "      <td>1.071476</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.695972</td>\n",
       "      <td>0.148566</td>\n",
       "      <td>0.616894</td>\n",
       "      <td>MCAS_2005_9_6</td>\n",
       "      <td>A 1500 kg car increases its speed by 2 m/s for...</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D'], 'text': ['750 ...</td>\n",
       "      <td>C</td>\n",
       "      <td>Question: A 1500 kg car increases its speed by...</td>\n",
       "      <td>0.513514</td>\n",
       "      <td>1.259977</td>\n",
       "      <td>1.622572</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.985402</td>\n",
       "      <td>0.156965</td>\n",
       "      <td>0.971843</td>\n",
       "      <td>Mercury_403234</td>\n",
       "      <td>A 20 N object is placed on a surface and start...</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D'], 'text': ['Grav...</td>\n",
       "      <td>B</td>\n",
       "      <td>Question: A 20 N object is placed on a surface...</td>\n",
       "      <td>0.243243</td>\n",
       "      <td>4.421014</td>\n",
       "      <td>1.714306</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.420586</td>\n",
       "      <td>0.092438</td>\n",
       "      <td>0.298635</td>\n",
       "      <td>Mercury_402207</td>\n",
       "      <td>A 60-kg man and a 25-kg boy, both on roller sk...</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D'], 'text': ['The ...</td>\n",
       "      <td>B</td>\n",
       "      <td>Question: A 60-kg man and a 25-kg boy, both on...</td>\n",
       "      <td>0.324324</td>\n",
       "      <td>-1.747676</td>\n",
       "      <td>1.009565</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1167</th>\n",
       "      <td>0.358702</td>\n",
       "      <td>0.058374</td>\n",
       "      <td>0.192833</td>\n",
       "      <td>Mercury_7005005</td>\n",
       "      <td>Winds blowing inland from oceans tend to have ...</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D'], 'text': ['Ther...</td>\n",
       "      <td>C</td>\n",
       "      <td>Question: Winds blowing inland from oceans ten...</td>\n",
       "      <td>0.324324</td>\n",
       "      <td>-2.423549</td>\n",
       "      <td>0.637539</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1168</th>\n",
       "      <td>0.907943</td>\n",
       "      <td>0.156718</td>\n",
       "      <td>0.794369</td>\n",
       "      <td>Mercury_7198468</td>\n",
       "      <td>Wolves, which are top predators, were eliminat...</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D'], 'text': ['an i...</td>\n",
       "      <td>A</td>\n",
       "      <td>Question: Wolves, which are top predators, wer...</td>\n",
       "      <td>0.270270</td>\n",
       "      <td>3.575032</td>\n",
       "      <td>1.711608</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1169</th>\n",
       "      <td>0.674634</td>\n",
       "      <td>0.129639</td>\n",
       "      <td>0.598123</td>\n",
       "      <td>TIMSS_1995_8_N4</td>\n",
       "      <td>Years ago farmers found that corn plants grew ...</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D', 'E'], 'text': [...</td>\n",
       "      <td>B</td>\n",
       "      <td>Question: Years ago farmers found that corn pl...</td>\n",
       "      <td>0.486486</td>\n",
       "      <td>1.026934</td>\n",
       "      <td>1.415864</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1170</th>\n",
       "      <td>0.391049</td>\n",
       "      <td>0.057967</td>\n",
       "      <td>0.244881</td>\n",
       "      <td>LEAP__4_10225</td>\n",
       "      <td>You are getting up to go to school in Louisian...</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D'], 'text': ['Eart...</td>\n",
       "      <td>B</td>\n",
       "      <td>Question: You are getting up to go to school i...</td>\n",
       "      <td>0.270270</td>\n",
       "      <td>-2.070266</td>\n",
       "      <td>0.633091</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1171</th>\n",
       "      <td>0.605303</td>\n",
       "      <td>0.104591</td>\n",
       "      <td>0.539249</td>\n",
       "      <td>Mercury_SC_416177</td>\n",
       "      <td>Zinc is a mineral that helps</td>\n",
       "      <td>{'label': ['A', 'B', 'C', 'D'], 'text': ['keep...</td>\n",
       "      <td>C</td>\n",
       "      <td>Question: Zinc is a mineral that helps\\nAnswer:</td>\n",
       "      <td>0.243243</td>\n",
       "      <td>0.269726</td>\n",
       "      <td>1.142296</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1172 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        rating  rating_std  rating_quantile                 id  \\\n",
       "0     0.633545    0.147859         0.569113    VASoL_2007_5_22   \n",
       "1     0.398912    0.098106         0.255973     Mercury_414352   \n",
       "2     0.695972    0.148566         0.616894      MCAS_2005_9_6   \n",
       "3     0.985402    0.156965         0.971843     Mercury_403234   \n",
       "4     0.420586    0.092438         0.298635     Mercury_402207   \n",
       "...        ...         ...              ...                ...   \n",
       "1167  0.358702    0.058374         0.192833    Mercury_7005005   \n",
       "1168  0.907943    0.156718         0.794369    Mercury_7198468   \n",
       "1169  0.674634    0.129639         0.598123    TIMSS_1995_8_N4   \n",
       "1170  0.391049    0.057967         0.244881      LEAP__4_10225   \n",
       "1171  0.605303    0.104591         0.539249  Mercury_SC_416177   \n",
       "\n",
       "                                               question  \\\n",
       "0     22 Sandy is conducting an investigation to fin...   \n",
       "1     A 0.20 kg softball travels 97 meters (m) south...   \n",
       "2     A 1500 kg car increases its speed by 2 m/s for...   \n",
       "3     A 20 N object is placed on a surface and start...   \n",
       "4     A 60-kg man and a 25-kg boy, both on roller sk...   \n",
       "...                                                 ...   \n",
       "1167  Winds blowing inland from oceans tend to have ...   \n",
       "1168  Wolves, which are top predators, were eliminat...   \n",
       "1169  Years ago farmers found that corn plants grew ...   \n",
       "1170  You are getting up to go to school in Louisian...   \n",
       "1171                       Zinc is a mineral that helps   \n",
       "\n",
       "                                                choices answerKey  \\\n",
       "0     {'label': ['A', 'B', 'C', 'D'], 'text': ['The ...         B   \n",
       "1     {'label': ['A', 'B', 'C', 'D'], 'text': ['The ...         A   \n",
       "2     {'label': ['A', 'B', 'C', 'D'], 'text': ['750 ...         C   \n",
       "3     {'label': ['A', 'B', 'C', 'D'], 'text': ['Grav...         B   \n",
       "4     {'label': ['A', 'B', 'C', 'D'], 'text': ['The ...         B   \n",
       "...                                                 ...       ...   \n",
       "1167  {'label': ['A', 'B', 'C', 'D'], 'text': ['Ther...         C   \n",
       "1168  {'label': ['A', 'B', 'C', 'D'], 'text': ['an i...         A   \n",
       "1169  {'label': ['A', 'B', 'C', 'D', 'E'], 'text': [...         B   \n",
       "1170  {'label': ['A', 'B', 'C', 'D'], 'text': ['Eart...         B   \n",
       "1171  {'label': ['A', 'B', 'C', 'D'], 'text': ['keep...         C   \n",
       "\n",
       "                                                example  model_avg_acc  \\\n",
       "0     Question: 22 Sandy is conducting an investigat...       0.729730   \n",
       "1     Question: A 0.20 kg softball travels 97 meters...       0.486486   \n",
       "2     Question: A 1500 kg car increases its speed by...       0.513514   \n",
       "3     Question: A 20 N object is placed on a surface...       0.243243   \n",
       "4     Question: A 60-kg man and a 25-kg boy, both on...       0.324324   \n",
       "...                                                 ...            ...   \n",
       "1167  Question: Winds blowing inland from oceans ten...       0.324324   \n",
       "1168  Question: Wolves, which are top predators, wer...       0.270270   \n",
       "1169  Question: Years ago farmers found that corn pl...       0.486486   \n",
       "1170  Question: You are getting up to go to school i...       0.270270   \n",
       "1171    Question: Zinc is a mineral that helps\\nAnswer:       0.243243   \n",
       "\n",
       "      unnorm_rating  unnorm_rating_std  \n",
       "0          0.578169           1.614857  \n",
       "1         -1.984390           1.071476  \n",
       "2          1.259977           1.622572  \n",
       "3          4.421014           1.714306  \n",
       "4         -1.747676           1.009565  \n",
       "...             ...                ...  \n",
       "1167      -2.423549           0.637539  \n",
       "1168       3.575032           1.711608  \n",
       "1169       1.026934           1.415864  \n",
       "1170      -2.070266           0.633091  \n",
       "1171       0.269726           1.142296  \n",
       "\n",
       "[1172 rows x 11 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from datasets import Dataset, DatasetDict\n",
    "\n",
    "# Normalize rating\n",
    "df[\"rating\"] = (df[\"difficulty\"] - df[\"difficulty\"].min()) / (\n",
    "    df[\"difficulty\"].max() - df[\"difficulty\"].min()\n",
    ")\n",
    "df[\"rating_std\"] = df[\"difficulty_std\"] / (\n",
    "    df[\"difficulty\"].max() - df[\"difficulty\"].min()\n",
    ")\n",
    "df[\"rating_quantile\"] = df[\"rating\"].rank(pct=True)\n",
    "\n",
    "df.drop(\n",
    "    columns=[\"sorted_index\", \"discrimination\", \"guessing\", \"feasibility\"], inplace=True\n",
    ")\n",
    "rename_dict = (\n",
    "    {\n",
    "        \"rating\": \"rating\",\n",
    "        \"rating_std\": \"rating_std\",\n",
    "        \"rating_quantile\": \"rating_quantile\",\n",
    "    }\n",
    "    | {c: c for c in df.columns if c not in [\"difficulty\", \"difficulty_std\"]}\n",
    "    | {\n",
    "        \"difficulty\": \"unnorm_rating\",\n",
    "        \"difficulty_std\": \"unnorm_rating_std\",\n",
    "    }\n",
    ")\n",
    "df = df[rename_dict.keys()]\n",
    "df.rename(\n",
    "    columns=rename_dict,\n",
    "    inplace=True,\n",
    ")\n",
    "\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c91ec28ac2aa4299bac5cbd2728f4027",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving the dataset (0/1 shards):   0%|          | 0/1172 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "DatasetDict({\"eval\": Dataset.from_pandas(df.reset_index(drop=True))}).save_to_disk(\n",
    "    \"./prepub/ARC\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sentence</th>\n",
       "      <th>option1</th>\n",
       "      <th>option2</th>\n",
       "      <th>answer</th>\n",
       "      <th>example</th>\n",
       "      <th>sorted_index</th>\n",
       "      <th>model_avg_acc</th>\n",
       "      <th>difficulty</th>\n",
       "      <th>difficulty_std</th>\n",
       "      <th>discrimination</th>\n",
       "      <th>guessing</th>\n",
       "      <th>feasibility</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A background check was performed on Natalie by...</td>\n",
       "      <td>Natalie</td>\n",
       "      <td>Lindsey</td>\n",
       "      <td>1</td>\n",
       "      <td>A background check was performed on Natalie by...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.68750</td>\n",
       "      <td>-7.263207</td>\n",
       "      <td>2.576043</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.355420</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A very active Randy injured their elbow playin...</td>\n",
       "      <td>Randy</td>\n",
       "      <td>Lawrence</td>\n",
       "      <td>2</td>\n",
       "      <td>A very active Randy injured their elbow playin...</td>\n",
       "      <td>1</td>\n",
       "      <td>0.78125</td>\n",
       "      <td>-6.121502</td>\n",
       "      <td>2.931483</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.590191</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Aaron didn't know Dennis had a peanut allergy,...</td>\n",
       "      <td>Aaron</td>\n",
       "      <td>Dennis</td>\n",
       "      <td>2</td>\n",
       "      <td>Aaron didn't know Dennis had a peanut allergy,...</td>\n",
       "      <td>2</td>\n",
       "      <td>0.43750</td>\n",
       "      <td>9.192675</td>\n",
       "      <td>1.333600</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.436682</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Aaron didn't know Dennis had a peanut allergy,...</td>\n",
       "      <td>Aaron</td>\n",
       "      <td>Dennis</td>\n",
       "      <td>1</td>\n",
       "      <td>Aaron didn't know Dennis had a peanut allergy,...</td>\n",
       "      <td>3</td>\n",
       "      <td>0.46875</td>\n",
       "      <td>-0.286533</td>\n",
       "      <td>1.015152</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.207178</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Aaron drank plenty of water before the hike, b...</td>\n",
       "      <td>Aaron</td>\n",
       "      <td>Christopher</td>\n",
       "      <td>2</td>\n",
       "      <td>Aaron drank plenty of water before the hike, b...</td>\n",
       "      <td>4</td>\n",
       "      <td>0.75000</td>\n",
       "      <td>-6.993700</td>\n",
       "      <td>3.138379</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.454976</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1262</th>\n",
       "      <td>the region was too crowded for our comfortable...</td>\n",
       "      <td>region</td>\n",
       "      <td>people</td>\n",
       "      <td>1</td>\n",
       "      <td>the region was too crowded for our comfortable...</td>\n",
       "      <td>1262</td>\n",
       "      <td>0.56250</td>\n",
       "      <td>1.441889</td>\n",
       "      <td>1.483347</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.404140</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1263</th>\n",
       "      <td>the scanning tool could not get through the ce...</td>\n",
       "      <td>cervix</td>\n",
       "      <td>tool</td>\n",
       "      <td>2</td>\n",
       "      <td>the scanning tool could not get through the ce...</td>\n",
       "      <td>1263</td>\n",
       "      <td>0.75000</td>\n",
       "      <td>-4.303753</td>\n",
       "      <td>1.678630</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.523116</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1264</th>\n",
       "      <td>the scanning tool could not get through the ce...</td>\n",
       "      <td>cervix</td>\n",
       "      <td>tool</td>\n",
       "      <td>1</td>\n",
       "      <td>the scanning tool could not get through the ce...</td>\n",
       "      <td>1264</td>\n",
       "      <td>0.59375</td>\n",
       "      <td>8.521003</td>\n",
       "      <td>1.907216</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.594892</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1265</th>\n",
       "      <td>the water poured freely into the hole until it...</td>\n",
       "      <td>hole</td>\n",
       "      <td>water</td>\n",
       "      <td>2</td>\n",
       "      <td>the water poured freely into the hole until it...</td>\n",
       "      <td>1265</td>\n",
       "      <td>0.56250</td>\n",
       "      <td>8.995516</td>\n",
       "      <td>2.578225</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.556743</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1266</th>\n",
       "      <td>the water poured freely into the hole until it...</td>\n",
       "      <td>hole</td>\n",
       "      <td>water</td>\n",
       "      <td>1</td>\n",
       "      <td>the water poured freely into the hole until it...</td>\n",
       "      <td>1266</td>\n",
       "      <td>0.84375</td>\n",
       "      <td>-2.907922</td>\n",
       "      <td>2.165578</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.711351</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1267 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               sentence  option1      option2  \\\n",
       "0     A background check was performed on Natalie by...  Natalie      Lindsey   \n",
       "1     A very active Randy injured their elbow playin...    Randy     Lawrence   \n",
       "2     Aaron didn't know Dennis had a peanut allergy,...    Aaron       Dennis   \n",
       "3     Aaron didn't know Dennis had a peanut allergy,...    Aaron       Dennis   \n",
       "4     Aaron drank plenty of water before the hike, b...    Aaron  Christopher   \n",
       "...                                                 ...      ...          ...   \n",
       "1262  the region was too crowded for our comfortable...   region       people   \n",
       "1263  the scanning tool could not get through the ce...   cervix         tool   \n",
       "1264  the scanning tool could not get through the ce...   cervix         tool   \n",
       "1265  the water poured freely into the hole until it...     hole        water   \n",
       "1266  the water poured freely into the hole until it...     hole        water   \n",
       "\n",
       "     answer                                            example  sorted_index  \\\n",
       "0         1  A background check was performed on Natalie by...             0   \n",
       "1         2  A very active Randy injured their elbow playin...             1   \n",
       "2         2  Aaron didn't know Dennis had a peanut allergy,...             2   \n",
       "3         1  Aaron didn't know Dennis had a peanut allergy,...             3   \n",
       "4         2  Aaron drank plenty of water before the hike, b...             4   \n",
       "...     ...                                                ...           ...   \n",
       "1262      1  the region was too crowded for our comfortable...          1262   \n",
       "1263      2  the scanning tool could not get through the ce...          1263   \n",
       "1264      1  the scanning tool could not get through the ce...          1264   \n",
       "1265      2  the water poured freely into the hole until it...          1265   \n",
       "1266      1  the water poured freely into the hole until it...          1266   \n",
       "\n",
       "      model_avg_acc  difficulty  difficulty_std  discrimination  guessing  \\\n",
       "0           0.68750   -7.263207        2.576043             NaN  0.355420   \n",
       "1           0.78125   -6.121502        2.931483             NaN  0.590191   \n",
       "2           0.43750    9.192675        1.333600             NaN  0.436682   \n",
       "3           0.46875   -0.286533        1.015152             NaN  0.207178   \n",
       "4           0.75000   -6.993700        3.138379             NaN  0.454976   \n",
       "...             ...         ...             ...             ...       ...   \n",
       "1262        0.56250    1.441889        1.483347             NaN  0.404140   \n",
       "1263        0.75000   -4.303753        1.678630             NaN  0.523116   \n",
       "1264        0.59375    8.521003        1.907216             NaN  0.594892   \n",
       "1265        0.56250    8.995516        2.578225             NaN  0.556743   \n",
       "1266        0.84375   -2.907922        2.165578             NaN  0.711351   \n",
       "\n",
       "      feasibility  \n",
       "0             NaN  \n",
       "1             NaN  \n",
       "2             NaN  \n",
       "3             NaN  \n",
       "4             NaN  \n",
       "...           ...  \n",
       "1262          NaN  \n",
       "1263          NaN  \n",
       "1264          NaN  \n",
       "1265          NaN  \n",
       "1266          NaN  \n",
       "\n",
       "[1267 rows x 12 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = load_dataset(\"mcding-org/Easy2Hard-Winogrande\", \"v5_1\", cache_dir=\"./cache\")[\n",
    "    \"model_1gPL_lr_0.1_epochs_3200_sha_6\"\n",
    "].to_pandas()\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>rating</th>\n",
       "      <th>rating_std</th>\n",
       "      <th>rating_quantile</th>\n",
       "      <th>sentence</th>\n",
       "      <th>option1</th>\n",
       "      <th>option2</th>\n",
       "      <th>answer</th>\n",
       "      <th>example</th>\n",
       "      <th>model_avg_acc</th>\n",
       "      <th>unnorm_rating</th>\n",
       "      <th>unnorm_rating_std</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.059847</td>\n",
       "      <td>0.144677</td>\n",
       "      <td>0.033938</td>\n",
       "      <td>A background check was performed on Natalie by...</td>\n",
       "      <td>Natalie</td>\n",
       "      <td>Lindsey</td>\n",
       "      <td>1</td>\n",
       "      <td>A background check was performed on Natalie by...</td>\n",
       "      <td>0.68750</td>\n",
       "      <td>-7.263207</td>\n",
       "      <td>2.576043</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.123969</td>\n",
       "      <td>0.164640</td>\n",
       "      <td>0.103394</td>\n",
       "      <td>A very active Randy injured their elbow playin...</td>\n",
       "      <td>Randy</td>\n",
       "      <td>Lawrence</td>\n",
       "      <td>2</td>\n",
       "      <td>A very active Randy injured their elbow playin...</td>\n",
       "      <td>0.78125</td>\n",
       "      <td>-6.121502</td>\n",
       "      <td>2.931483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.984052</td>\n",
       "      <td>0.074898</td>\n",
       "      <td>0.965272</td>\n",
       "      <td>Aaron didn't know Dennis had a peanut allergy,...</td>\n",
       "      <td>Aaron</td>\n",
       "      <td>Dennis</td>\n",
       "      <td>2</td>\n",
       "      <td>Aaron didn't know Dennis had a peanut allergy,...</td>\n",
       "      <td>0.43750</td>\n",
       "      <td>9.192675</td>\n",
       "      <td>1.333600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.451675</td>\n",
       "      <td>0.057014</td>\n",
       "      <td>0.487766</td>\n",
       "      <td>Aaron didn't know Dennis had a peanut allergy,...</td>\n",
       "      <td>Aaron</td>\n",
       "      <td>Dennis</td>\n",
       "      <td>1</td>\n",
       "      <td>Aaron didn't know Dennis had a peanut allergy,...</td>\n",
       "      <td>0.46875</td>\n",
       "      <td>-0.286533</td>\n",
       "      <td>1.015152</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.074984</td>\n",
       "      <td>0.176259</td>\n",
       "      <td>0.055249</td>\n",
       "      <td>Aaron drank plenty of water before the hike, b...</td>\n",
       "      <td>Aaron</td>\n",
       "      <td>Christopher</td>\n",
       "      <td>2</td>\n",
       "      <td>Aaron drank plenty of water before the hike, b...</td>\n",
       "      <td>0.75000</td>\n",
       "      <td>-6.993700</td>\n",
       "      <td>3.138379</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1262</th>\n",
       "      <td>0.548748</td>\n",
       "      <td>0.083309</td>\n",
       "      <td>0.665351</td>\n",
       "      <td>the region was too crowded for our comfortable...</td>\n",
       "      <td>region</td>\n",
       "      <td>people</td>\n",
       "      <td>1</td>\n",
       "      <td>the region was too crowded for our comfortable...</td>\n",
       "      <td>0.56250</td>\n",
       "      <td>1.441889</td>\n",
       "      <td>1.483347</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1263</th>\n",
       "      <td>0.226058</td>\n",
       "      <td>0.094276</td>\n",
       "      <td>0.211523</td>\n",
       "      <td>the scanning tool could not get through the ce...</td>\n",
       "      <td>cervix</td>\n",
       "      <td>tool</td>\n",
       "      <td>2</td>\n",
       "      <td>the scanning tool could not get through the ce...</td>\n",
       "      <td>0.75000</td>\n",
       "      <td>-4.303753</td>\n",
       "      <td>1.678630</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1264</th>\n",
       "      <td>0.946329</td>\n",
       "      <td>0.107114</td>\n",
       "      <td>0.907656</td>\n",
       "      <td>the scanning tool could not get through the ce...</td>\n",
       "      <td>cervix</td>\n",
       "      <td>tool</td>\n",
       "      <td>1</td>\n",
       "      <td>the scanning tool could not get through the ce...</td>\n",
       "      <td>0.59375</td>\n",
       "      <td>8.521003</td>\n",
       "      <td>1.907216</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1265</th>\n",
       "      <td>0.972979</td>\n",
       "      <td>0.144800</td>\n",
       "      <td>0.947119</td>\n",
       "      <td>the water poured freely into the hole until it...</td>\n",
       "      <td>hole</td>\n",
       "      <td>water</td>\n",
       "      <td>2</td>\n",
       "      <td>the water poured freely into the hole until it...</td>\n",
       "      <td>0.56250</td>\n",
       "      <td>8.995516</td>\n",
       "      <td>2.578225</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1266</th>\n",
       "      <td>0.304451</td>\n",
       "      <td>0.121624</td>\n",
       "      <td>0.334649</td>\n",
       "      <td>the water poured freely into the hole until it...</td>\n",
       "      <td>hole</td>\n",
       "      <td>water</td>\n",
       "      <td>1</td>\n",
       "      <td>the water poured freely into the hole until it...</td>\n",
       "      <td>0.84375</td>\n",
       "      <td>-2.907922</td>\n",
       "      <td>2.165578</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1267 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        rating  rating_std  rating_quantile  \\\n",
       "0     0.059847    0.144677         0.033938   \n",
       "1     0.123969    0.164640         0.103394   \n",
       "2     0.984052    0.074898         0.965272   \n",
       "3     0.451675    0.057014         0.487766   \n",
       "4     0.074984    0.176259         0.055249   \n",
       "...        ...         ...              ...   \n",
       "1262  0.548748    0.083309         0.665351   \n",
       "1263  0.226058    0.094276         0.211523   \n",
       "1264  0.946329    0.107114         0.907656   \n",
       "1265  0.972979    0.144800         0.947119   \n",
       "1266  0.304451    0.121624         0.334649   \n",
       "\n",
       "                                               sentence  option1      option2  \\\n",
       "0     A background check was performed on Natalie by...  Natalie      Lindsey   \n",
       "1     A very active Randy injured their elbow playin...    Randy     Lawrence   \n",
       "2     Aaron didn't know Dennis had a peanut allergy,...    Aaron       Dennis   \n",
       "3     Aaron didn't know Dennis had a peanut allergy,...    Aaron       Dennis   \n",
       "4     Aaron drank plenty of water before the hike, b...    Aaron  Christopher   \n",
       "...                                                 ...      ...          ...   \n",
       "1262  the region was too crowded for our comfortable...   region       people   \n",
       "1263  the scanning tool could not get through the ce...   cervix         tool   \n",
       "1264  the scanning tool could not get through the ce...   cervix         tool   \n",
       "1265  the water poured freely into the hole until it...     hole        water   \n",
       "1266  the water poured freely into the hole until it...     hole        water   \n",
       "\n",
       "     answer                                            example  model_avg_acc  \\\n",
       "0         1  A background check was performed on Natalie by...        0.68750   \n",
       "1         2  A very active Randy injured their elbow playin...        0.78125   \n",
       "2         2  Aaron didn't know Dennis had a peanut allergy,...        0.43750   \n",
       "3         1  Aaron didn't know Dennis had a peanut allergy,...        0.46875   \n",
       "4         2  Aaron drank plenty of water before the hike, b...        0.75000   \n",
       "...     ...                                                ...            ...   \n",
       "1262      1  the region was too crowded for our comfortable...        0.56250   \n",
       "1263      2  the scanning tool could not get through the ce...        0.75000   \n",
       "1264      1  the scanning tool could not get through the ce...        0.59375   \n",
       "1265      2  the water poured freely into the hole until it...        0.56250   \n",
       "1266      1  the water poured freely into the hole until it...        0.84375   \n",
       "\n",
       "      unnorm_rating  unnorm_rating_std  \n",
       "0         -7.263207           2.576043  \n",
       "1         -6.121502           2.931483  \n",
       "2          9.192675           1.333600  \n",
       "3         -0.286533           1.015152  \n",
       "4         -6.993700           3.138379  \n",
       "...             ...                ...  \n",
       "1262       1.441889           1.483347  \n",
       "1263      -4.303753           1.678630  \n",
       "1264       8.521003           1.907216  \n",
       "1265       8.995516           2.578225  \n",
       "1266      -2.907922           2.165578  \n",
       "\n",
       "[1267 rows x 11 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from datasets import Dataset, DatasetDict\n",
    "\n",
    "# Normalize rating\n",
    "df[\"rating\"] = (df[\"difficulty\"] - df[\"difficulty\"].min()) / (\n",
    "    df[\"difficulty\"].max() - df[\"difficulty\"].min()\n",
    ")\n",
    "df[\"rating_std\"] = df[\"difficulty_std\"] / (\n",
    "    df[\"difficulty\"].max() - df[\"difficulty\"].min()\n",
    ")\n",
    "df[\"rating_quantile\"] = df[\"rating\"].rank(pct=True)\n",
    "\n",
    "df.drop(\n",
    "    columns=[\"sorted_index\", \"discrimination\", \"guessing\", \"feasibility\"], inplace=True\n",
    ")\n",
    "rename_dict = (\n",
    "    {\n",
    "        \"rating\": \"rating\",\n",
    "        \"rating_std\": \"rating_std\",\n",
    "        \"rating_quantile\": \"rating_quantile\",\n",
    "    }\n",
    "    | {c: c for c in df.columns if c not in [\"difficulty\", \"difficulty_std\"]}\n",
    "    | {\n",
    "        \"difficulty\": \"unnorm_rating\",\n",
    "        \"difficulty_std\": \"unnorm_rating_std\",\n",
    "    }\n",
    ")\n",
    "df = df[rename_dict.keys()]\n",
    "df.rename(\n",
    "    columns=rename_dict,\n",
    "    inplace=True,\n",
    ")\n",
    "\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "90446f17497042b3b6b9c0ab01ea250a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Saving the dataset (0/1 shards):   0%|          | 0/1267 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "DatasetDict({\"eval\": Dataset.from_pandas(df.reset_index(drop=True))}).save_to_disk(\n",
    "    \"./prepub/Winogrande\"\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
