{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Downloading data: 100%|██████████| 470k/470k [00:00<00:00, 575kB/s]\n",
      "Generating test split: 100%|██████████| 9/9 [00:00<00:00, 341.15 examples/s]\n"
     ]
    }
   ],
   "source": [
    "from datasets import load_dataset\n",
    "\n",
    "dataset = load_dataset(\"lmms-lab/LiveBench\", \"2024-06\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatasetDict({\n",
       "    test: Dataset({\n",
       "        features: ['id', 'images', 'website', 'question', 'answer', 'subtask', 'data_generator', 'checker', 'date_time', 'screen_shoter', 'screen_size', 'score', 'reason', 'scorer_name'],\n",
       "        num_rows: 9\n",
       "    })\n",
       "})"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>images</th>\n",
       "      <th>website</th>\n",
       "      <th>question</th>\n",
       "      <th>answer</th>\n",
       "      <th>subtask</th>\n",
       "      <th>data_generator</th>\n",
       "      <th>checker</th>\n",
       "      <th>date_time</th>\n",
       "      <th>screen_shoter</th>\n",
       "      <th>screen_size</th>\n",
       "      <th>score</th>\n",
       "      <th>reason</th>\n",
       "      <th>scorer_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>[{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...</td>\n",
       "      <td>{'url': 'https://www.bbc.com/'}</td>\n",
       "      <td>Look at the image provided. Two of the article...</td>\n",
       "      <td>The shared theme is the legal situation of Hun...</td>\n",
       "      <td>Contextual Analysis</td>\n",
       "      <td>gpt4v</td>\n",
       "      <td>gemini</td>\n",
       "      <td>2024-06-12 01:14:15</td>\n",
       "      <td>single_screen</td>\n",
       "      <td>(1024, 1024)</td>\n",
       "      <td>10</td>\n",
       "      <td>The answer accurately identifies the article d...</td>\n",
       "      <td>gpt4v</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>[{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...</td>\n",
       "      <td>{'url': 'https://www.bbc.com/'}</td>\n",
       "      <td>Based on the provided image, what is the main ...</td>\n",
       "      <td>Ukraine has launched missiles at sites within ...</td>\n",
       "      <td>Basic Understanding</td>\n",
       "      <td>gpt4v</td>\n",
       "      <td>gemini</td>\n",
       "      <td>2024-06-12 01:14:15</td>\n",
       "      <td>single_screen</td>\n",
       "      <td>(1024, 1024)</td>\n",
       "      <td>10</td>\n",
       "      <td>The information provided in the answer directl...</td>\n",
       "      <td>gpt4v</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>[{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...</td>\n",
       "      <td>{'url': 'https://www.bbc.com/'}</td>\n",
       "      <td>Based on the juxtaposition of the articles on ...</td>\n",
       "      <td>The two narratives are the personal and legal ...</td>\n",
       "      <td>Contextual Analysis</td>\n",
       "      <td>gpt4v</td>\n",
       "      <td>gemini</td>\n",
       "      <td>2024-06-12 01:14:15</td>\n",
       "      <td>single_screen</td>\n",
       "      <td>(1024, 1024)</td>\n",
       "      <td>8</td>\n",
       "      <td>The answer is mostly authentic as the convicti...</td>\n",
       "      <td>gpt4v</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>[{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...</td>\n",
       "      <td>{'url': 'https://www.bbc.com/'}</td>\n",
       "      <td>Examining the headline and image, what visual ...</td>\n",
       "      <td>The image showcasing a relatively unscathed st...</td>\n",
       "      <td>Contextual Analysis</td>\n",
       "      <td>gpt4v</td>\n",
       "      <td>gemini</td>\n",
       "      <td>2024-06-12 01:14:15</td>\n",
       "      <td>single_screen</td>\n",
       "      <td>(1024, 1024)</td>\n",
       "      <td>8</td>\n",
       "      <td>The answer is relevant and provides a plausibl...</td>\n",
       "      <td>gpt4v</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>[{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...</td>\n",
       "      <td>{'url': 'https://www.bbc.com/'}</td>\n",
       "      <td>Based on the juxtaposition of the articles on ...</td>\n",
       "      <td>The audience might infer a potential media bia...</td>\n",
       "      <td>Deeper Implications</td>\n",
       "      <td>gpt4v</td>\n",
       "      <td>gemini</td>\n",
       "      <td>2024-06-12 01:14:15</td>\n",
       "      <td>single_screen</td>\n",
       "      <td>(1024, 1024)</td>\n",
       "      <td>8</td>\n",
       "      <td>The response provides a reasoned inference bas...</td>\n",
       "      <td>gpt4v</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>[{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...</td>\n",
       "      <td>{'url': 'https://www.bbc.com/'}</td>\n",
       "      <td>Examining the image and the headline \"What son...</td>\n",
       "      <td>Potential narratives could include: the impact...</td>\n",
       "      <td>Broader Implications</td>\n",
       "      <td>gpt4v</td>\n",
       "      <td>gemini</td>\n",
       "      <td>2024-06-12 01:14:15</td>\n",
       "      <td>single_screen</td>\n",
       "      <td>(1024, 1024)</td>\n",
       "      <td>7</td>\n",
       "      <td>Authenticity (4/5): The information about it b...</td>\n",
       "      <td>gpt4v</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>[{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...</td>\n",
       "      <td>{'url': 'https://www.bbc.com/'}</td>\n",
       "      <td>Based on the visual framing and placement of t...</td>\n",
       "      <td>The BBC seemingly prioritizes Hunter Biden's t...</td>\n",
       "      <td>Contextual Analysis</td>\n",
       "      <td>gpt4v</td>\n",
       "      <td>gemini</td>\n",
       "      <td>2024-06-12 01:14:15</td>\n",
       "      <td>single_screen</td>\n",
       "      <td>(1024, 1024)</td>\n",
       "      <td>8</td>\n",
       "      <td>The answer effectively compares domestic polit...</td>\n",
       "      <td>gpt4v</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>[{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...</td>\n",
       "      <td>{'url': 'https://www.bbc.com/'}</td>\n",
       "      <td>Considering the use of the word \"LIVE\" in the ...</td>\n",
       "      <td>The use of \"LIVE\" could lead the public to per...</td>\n",
       "      <td>Deeper Implications</td>\n",
       "      <td>gpt4v</td>\n",
       "      <td>gemini</td>\n",
       "      <td>2024-06-12 01:14:15</td>\n",
       "      <td>single_screen</td>\n",
       "      <td>(1024, 1024)</td>\n",
       "      <td>10</td>\n",
       "      <td>The answer provided is directly related to the...</td>\n",
       "      <td>gpt4v</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>[{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...</td>\n",
       "      <td>{'url': 'https://www.bbc.com/'}</td>\n",
       "      <td>Based on the image alone, what elements sugges...</td>\n",
       "      <td>The juxtaposition of the image depicting Presi...</td>\n",
       "      <td>Deeper Implications</td>\n",
       "      <td>gpt4v</td>\n",
       "      <td>gemini</td>\n",
       "      <td>2024-06-12 01:14:15</td>\n",
       "      <td>single_screen</td>\n",
       "      <td>(1024, 1024)</td>\n",
       "      <td>8</td>\n",
       "      <td>The answer effectively discusses the potential...</td>\n",
       "      <td>gpt4v</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id                                             images  \\\n",
       "0   0  [{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...   \n",
       "1   1  [{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...   \n",
       "2   2  [{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...   \n",
       "3   3  [{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...   \n",
       "4   4  [{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...   \n",
       "5   5  [{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...   \n",
       "6   6  [{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...   \n",
       "7   7  [{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...   \n",
       "8   8  [{'bytes': b'\\x89PNG\\r\\n\\x1a\\n\\x00\\x00\\x00\\rIH...   \n",
       "\n",
       "                           website  \\\n",
       "0  {'url': 'https://www.bbc.com/'}   \n",
       "1  {'url': 'https://www.bbc.com/'}   \n",
       "2  {'url': 'https://www.bbc.com/'}   \n",
       "3  {'url': 'https://www.bbc.com/'}   \n",
       "4  {'url': 'https://www.bbc.com/'}   \n",
       "5  {'url': 'https://www.bbc.com/'}   \n",
       "6  {'url': 'https://www.bbc.com/'}   \n",
       "7  {'url': 'https://www.bbc.com/'}   \n",
       "8  {'url': 'https://www.bbc.com/'}   \n",
       "\n",
       "                                            question  \\\n",
       "0  Look at the image provided. Two of the article...   \n",
       "1  Based on the provided image, what is the main ...   \n",
       "2  Based on the juxtaposition of the articles on ...   \n",
       "3  Examining the headline and image, what visual ...   \n",
       "4  Based on the juxtaposition of the articles on ...   \n",
       "5  Examining the image and the headline \"What son...   \n",
       "6  Based on the visual framing and placement of t...   \n",
       "7  Considering the use of the word \"LIVE\" in the ...   \n",
       "8  Based on the image alone, what elements sugges...   \n",
       "\n",
       "                                              answer               subtask  \\\n",
       "0  The shared theme is the legal situation of Hun...   Contextual Analysis   \n",
       "1  Ukraine has launched missiles at sites within ...   Basic Understanding   \n",
       "2  The two narratives are the personal and legal ...   Contextual Analysis   \n",
       "3  The image showcasing a relatively unscathed st...   Contextual Analysis   \n",
       "4  The audience might infer a potential media bia...   Deeper Implications   \n",
       "5  Potential narratives could include: the impact...  Broader Implications   \n",
       "6  The BBC seemingly prioritizes Hunter Biden's t...   Contextual Analysis   \n",
       "7  The use of \"LIVE\" could lead the public to per...   Deeper Implications   \n",
       "8  The juxtaposition of the image depicting Presi...   Deeper Implications   \n",
       "\n",
       "  data_generator checker            date_time  screen_shoter   screen_size  \\\n",
       "0          gpt4v  gemini  2024-06-12 01:14:15  single_screen  (1024, 1024)   \n",
       "1          gpt4v  gemini  2024-06-12 01:14:15  single_screen  (1024, 1024)   \n",
       "2          gpt4v  gemini  2024-06-12 01:14:15  single_screen  (1024, 1024)   \n",
       "3          gpt4v  gemini  2024-06-12 01:14:15  single_screen  (1024, 1024)   \n",
       "4          gpt4v  gemini  2024-06-12 01:14:15  single_screen  (1024, 1024)   \n",
       "5          gpt4v  gemini  2024-06-12 01:14:15  single_screen  (1024, 1024)   \n",
       "6          gpt4v  gemini  2024-06-12 01:14:15  single_screen  (1024, 1024)   \n",
       "7          gpt4v  gemini  2024-06-12 01:14:15  single_screen  (1024, 1024)   \n",
       "8          gpt4v  gemini  2024-06-12 01:14:15  single_screen  (1024, 1024)   \n",
       "\n",
       "   score                                             reason scorer_name  \n",
       "0     10  The answer accurately identifies the article d...       gpt4v  \n",
       "1     10  The information provided in the answer directl...       gpt4v  \n",
       "2      8  The answer is mostly authentic as the convicti...       gpt4v  \n",
       "3      8  The answer is relevant and provides a plausibl...       gpt4v  \n",
       "4      8  The response provides a reasoned inference bas...       gpt4v  \n",
       "5      7  Authenticity (4/5): The information about it b...       gpt4v  \n",
       "6      8  The answer effectively compares domestic polit...       gpt4v  \n",
       "7     10  The answer provided is directly related to the...       gpt4v  \n",
       "8      8  The answer effectively discusses the potential...       gpt4v  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset[\"test\"].to_pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "live_bench",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
