{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/data/pufanyi/anaconda3/anacondabin/envs/live_bench/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "from datasets import Dataset, load_dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Downloading readme: 100%|██████████| 1.60k/1.60k [00:00<00:00, 2.29MB/s]\n",
      "Downloading data: 100%|██████████| 48.9M/48.9M [00:04<00:00, 11.3MB/s]\n",
      "Downloading data: 100%|██████████| 87.6M/87.6M [00:04<00:00, 20.7MB/s]\n",
      "Downloading data: 100%|██████████| 103M/103M [00:05<00:00, 18.8MB/s] \n",
      "Generating test split: 100%|██████████| 943/943 [00:03<00:00, 290.89 examples/s]\n"
     ]
    }
   ],
   "source": [
    "data = load_dataset(\"lmms-lab/LiveBench\", \"2024-06\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatasetDict({\n",
       "    test: Dataset({\n",
       "        features: ['id', 'images', 'website', 'question', 'answer', 'criteria', 'subtask', 'data_generator', 'checker', 'date_time', 'screen_shoter', 'screen_size', 'score', 'reason', 'scorer_name'],\n",
       "        num_rows: 943\n",
       "    })\n",
       "})"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Filter: 100%|██████████| 441/441 [00:33<00:00, 13.09 examples/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Dataset({\n",
       "    features: ['id', 'images', 'website', 'question', 'answer', 'subtask', 'data_generator', 'checker', 'date_time', 'screen_shoter', 'screen_size', 'score', 'reason', 'scorer_name'],\n",
       "    num_rows: 409\n",
       "})"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_data = data[\"test\"].filter(lambda example: example[\"score\"] and example[\"score\"] > 5)\n",
    "filtered_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>images</th>\n",
       "      <th>website</th>\n",
       "      <th>question</th>\n",
       "      <th>answer</th>\n",
       "      <th>subtask</th>\n",
       "      <th>data_generator</th>\n",
       "      <th>checker</th>\n",
       "      <th>date_time</th>\n",
       "      <th>screen_shoter</th>\n",
       "      <th>screen_size</th>\n",
       "      <th>score</th>\n",
       "      <th>reason</th>\n",
       "      <th>scorer_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>[{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...</td>\n",
       "      <td>{'url': 'https://www.bbc.com/'}</td>\n",
       "      <td>Look at the image provided. Which article head...</td>\n",
       "      <td>\"BBC tracks down smuggler behind Channel cross...</td>\n",
       "      <td>Deeper Implications</td>\n",
       "      <td>gpt4v</td>\n",
       "      <td>gemini</td>\n",
       "      <td>2024-06-27 14:36:42</td>\n",
       "      <td>single_screen</td>\n",
       "      <td>(1024, 1024)</td>\n",
       "      <td>10</td>\n",
       "      <td>The answer accurately identifies the relevant ...</td>\n",
       "      <td>gpt4v</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>[{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...</td>\n",
       "      <td>{'url': 'https://www.bbc.com/'}</td>\n",
       "      <td>Look at the image. What significant global eve...</td>\n",
       "      <td>The image of a young girl wearing a life jacke...</td>\n",
       "      <td>Contextual Analysis</td>\n",
       "      <td>gpt4v</td>\n",
       "      <td>gemini</td>\n",
       "      <td>2024-06-27 14:36:42</td>\n",
       "      <td>single_screen</td>\n",
       "      <td>(1024, 1024)</td>\n",
       "      <td>10</td>\n",
       "      <td>The answer correctly identifies Biden and Trum...</td>\n",
       "      <td>gpt4v</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>[{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...</td>\n",
       "      <td>{'url': 'https://www.bbc.com/'}</td>\n",
       "      <td>What detail in the image connected to the \"Bol...</td>\n",
       "      <td>The \"Interpol Bolivia\" background visible in t...</td>\n",
       "      <td>Deeper Implications</td>\n",
       "      <td>gpt4v</td>\n",
       "      <td>gemini</td>\n",
       "      <td>2024-06-27 14:36:42</td>\n",
       "      <td>single_screen</td>\n",
       "      <td>(1024, 1024)</td>\n",
       "      <td>7</td>\n",
       "      <td>Authenticity (4/5): The answer is reasonable b...</td>\n",
       "      <td>gpt4v</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>[{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...</td>\n",
       "      <td>{'url': 'https://www.bbc.com/'}</td>\n",
       "      <td>Analyze the visual composition of the article ...</td>\n",
       "      <td>The image of the young girl, potentially a mig...</td>\n",
       "      <td>Contextual Analysis</td>\n",
       "      <td>gpt4v</td>\n",
       "      <td>gemini</td>\n",
       "      <td>2024-06-27 14:36:42</td>\n",
       "      <td>single_screen</td>\n",
       "      <td>(1024, 1024)</td>\n",
       "      <td>8</td>\n",
       "      <td>The question directly relates to assessing the...</td>\n",
       "      <td>gpt4v</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>[{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...</td>\n",
       "      <td>{'url': 'https://www.bbc.com/'}</td>\n",
       "      <td>Focusing on the article \"BBC tracks down smugg...</td>\n",
       "      <td>The image of a child, juxtaposed with an artic...</td>\n",
       "      <td>Broader Implications</td>\n",
       "      <td>gpt4v</td>\n",
       "      <td>gemini</td>\n",
       "      <td>2024-06-27 14:36:42</td>\n",
       "      <td>single_screen</td>\n",
       "      <td>(1024, 1024)</td>\n",
       "      <td>10</td>\n",
       "      <td>The answer directly correlates with the story'...</td>\n",
       "      <td>gpt4v</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id                                             images  \\\n",
       "0   0  [{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...   \n",
       "1   1  [{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...   \n",
       "2   2  [{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...   \n",
       "3   3  [{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...   \n",
       "4   4  [{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...   \n",
       "\n",
       "                           website  \\\n",
       "0  {'url': 'https://www.bbc.com/'}   \n",
       "1  {'url': 'https://www.bbc.com/'}   \n",
       "2  {'url': 'https://www.bbc.com/'}   \n",
       "3  {'url': 'https://www.bbc.com/'}   \n",
       "4  {'url': 'https://www.bbc.com/'}   \n",
       "\n",
       "                                            question  \\\n",
       "0  Look at the image provided. Which article head...   \n",
       "1  Look at the image. What significant global eve...   \n",
       "2  What detail in the image connected to the \"Bol...   \n",
       "3  Analyze the visual composition of the article ...   \n",
       "4  Focusing on the article \"BBC tracks down smugg...   \n",
       "\n",
       "                                              answer               subtask  \\\n",
       "0  \"BBC tracks down smuggler behind Channel cross...   Deeper Implications   \n",
       "1  The image of a young girl wearing a life jacke...   Contextual Analysis   \n",
       "2  The \"Interpol Bolivia\" background visible in t...   Deeper Implications   \n",
       "3  The image of the young girl, potentially a mig...   Contextual Analysis   \n",
       "4  The image of a child, juxtaposed with an artic...  Broader Implications   \n",
       "\n",
       "  data_generator checker            date_time  screen_shoter   screen_size  \\\n",
       "0          gpt4v  gemini  2024-06-27 14:36:42  single_screen  (1024, 1024)   \n",
       "1          gpt4v  gemini  2024-06-27 14:36:42  single_screen  (1024, 1024)   \n",
       "2          gpt4v  gemini  2024-06-27 14:36:42  single_screen  (1024, 1024)   \n",
       "3          gpt4v  gemini  2024-06-27 14:36:42  single_screen  (1024, 1024)   \n",
       "4          gpt4v  gemini  2024-06-27 14:36:42  single_screen  (1024, 1024)   \n",
       "\n",
       "   score                                             reason scorer_name  \n",
       "0     10  The answer accurately identifies the relevant ...       gpt4v  \n",
       "1     10  The answer correctly identifies Biden and Trum...       gpt4v  \n",
       "2      7  Authenticity (4/5): The answer is reasonable b...       gpt4v  \n",
       "3      8  The question directly relates to assessing the...       gpt4v  \n",
       "4     10  The answer directly correlates with the story'...       gpt4v  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_data.to_pandas().head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Map: 100%|██████████| 409/409 [00:30<00:00, 13.63 examples/s]?it/s]\n",
      "Creating parquet from Arrow format: 100%|██████████| 5/5 [00:00<00:00, 34.92ba/s]\n",
      "Uploading the dataset shards: 100%|██████████| 1/1 [00:38<00:00, 38.26s/it]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "CommitInfo(commit_url='https://huggingface.co/datasets/lmms-lab/LiveBench/commit/7eaf5caa899cc0b8bae7156cc534e12825a97565', commit_message='Upload dataset', commit_description='', oid='7eaf5caa899cc0b8bae7156cc534e12825a97565', pr_url=None, pr_revision=None, pr_num=None)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filtered_data.push_to_hub(\"lmms-lab/LiveBench\", \"2024-06\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "live_bench",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
