{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1aef0cd2-cd69-4d85-a1bb-feb842b5e78c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import ast"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "ef931d02-194e-4c3d-b014-2469c2b545be",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function to convert the string to a list (if it's a string representation of a list)\n",
    "def safe_eval(value):\n",
    "    try:\n",
    "        return ast.literal_eval(value)\n",
    "    except (ValueError, SyntaxError):\n",
    "        return value  # If it's not a list-like string, just return the value"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a5dc2f4c-0b2d-4687-bac2-da0b758e28f8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function to unwrap single-element lists\n",
    "def unwrap_list(x):\n",
    "    if isinstance(x, list) and len(x) == 1:\n",
    "        return x[0]\n",
    "    return str(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "3c30bad2-5343-4a7d-9446-f09ae63bff6e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1. List your CSV paths\n",
    "csv_paths = [\n",
    "    \"internvl_labels.csv\",\n",
    "    \"ovis_labels.csv\",\n",
    "    \"qwen_labels.csv\",\n",
    "    \"sailvl_labels.csv\",\n",
    "    \"ola_labels.csv\",\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "76fbd238-5202-4a49-8c67-214add9f65f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2. Read and rename columns so that merges don’t collide:\n",
    "dfs = []\n",
    "for i, path in enumerate(csv_paths, start=1):\n",
    "    df = pd.read_csv(path)\n",
    "    # prefix each attribute with annotator index\n",
    "    df = df.add_prefix(f\"A{i}_\")\n",
    "    # but keep the filename column consistent:\n",
    "    df = df.rename(columns={f\"A{i}_File Name\": \"File Name\"})\n",
    "    df = df.applymap(safe_eval).applymap(unwrap_list)\n",
    "    dfs.append(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "c26b009e-a88f-4ea8-97ae-1ca967ef6b40",
   "metadata": {},
   "outputs": [],
   "source": [
    "from functools import reduce\n",
    "# 3. Merge them all on File Name\n",
    "df_merged = reduce(lambda left, right: pd.merge(left, right, on=\"File Name\"), dfs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "a2fd3062-a02a-454b-88a4-40c88db443b8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>File Name</th>\n",
       "      <th>A1_Gender</th>\n",
       "      <th>A1_Age</th>\n",
       "      <th>A1_Skin Color</th>\n",
       "      <th>A1_Ancestry</th>\n",
       "      <th>A1_Hair Color</th>\n",
       "      <th>A1_Bangs</th>\n",
       "      <th>A1_Bald</th>\n",
       "      <th>A1_Beard</th>\n",
       "      <th>A1_Glasses</th>\n",
       "      <th>...</th>\n",
       "      <th>A5_Gender</th>\n",
       "      <th>A5_Age</th>\n",
       "      <th>A5_Skin Color</th>\n",
       "      <th>A5_Ancestry</th>\n",
       "      <th>A5_Hair Color</th>\n",
       "      <th>A5_Bangs</th>\n",
       "      <th>A5_Bald</th>\n",
       "      <th>A5_Beard</th>\n",
       "      <th>A5_Glasses</th>\n",
       "      <th>A5_Headwear</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>African/m.03c31xs/m.03c31xs_0001.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>young</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>blonde</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>full</td>\n",
       "      <td>no</td>\n",
       "      <td>...</td>\n",
       "      <td>male</td>\n",
       "      <td>young</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>blonde</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>mustache</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>African/m.03c31xs/m.03c31xs_0004.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>young</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>full</td>\n",
       "      <td>no</td>\n",
       "      <td>...</td>\n",
       "      <td>male</td>\n",
       "      <td>young</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>African/m.03c31xs/m.03c31xs_0003.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>young</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>stubble</td>\n",
       "      <td>no</td>\n",
       "      <td>...</td>\n",
       "      <td>male</td>\n",
       "      <td>young</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>African/m.03c31xs/m.03c31xs_0002.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>young</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>full</td>\n",
       "      <td>no</td>\n",
       "      <td>...</td>\n",
       "      <td>male</td>\n",
       "      <td>young</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>African/m.01ng51t/m.01ng51t_0002.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>senior</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>unknown</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>mustache</td>\n",
       "      <td>no</td>\n",
       "      <td>...</td>\n",
       "      <td>male</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>mustache</td>\n",
       "      <td>no</td>\n",
       "      <td>hat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40602</th>\n",
       "      <td>Indian/m.0fgsc6/m.0fgsc6_0002.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>medium</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>...</td>\n",
       "      <td>male</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>medium</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40603</th>\n",
       "      <td>Indian/m.0h06p_/m.0h06p__0001.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>senior</td>\n",
       "      <td>medium</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>gray</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>...</td>\n",
       "      <td>male</td>\n",
       "      <td>senior</td>\n",
       "      <td>light</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>gray</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40604</th>\n",
       "      <td>Indian/m.0h06p_/m.0h06p__0003.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>senior</td>\n",
       "      <td>medium</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>gray</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>...</td>\n",
       "      <td>male</td>\n",
       "      <td>senior</td>\n",
       "      <td>light</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>gray</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40605</th>\n",
       "      <td>Indian/m.0h06p_/m.0h06p__0002.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>senior</td>\n",
       "      <td>medium</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>gray</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>...</td>\n",
       "      <td>male</td>\n",
       "      <td>senior</td>\n",
       "      <td>light</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>gray</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40606</th>\n",
       "      <td>Indian/m.0h06p_/m.0h06p__0004.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>senior</td>\n",
       "      <td>medium</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>gray</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>...</td>\n",
       "      <td>male</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>light</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>40607 rows × 51 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  File Name A1_Gender       A1_Age  \\\n",
       "0      African/m.03c31xs/m.03c31xs_0001.jpg      male        young   \n",
       "1      African/m.03c31xs/m.03c31xs_0004.jpg      male        young   \n",
       "2      African/m.03c31xs/m.03c31xs_0003.jpg      male        young   \n",
       "3      African/m.03c31xs/m.03c31xs_0002.jpg      male        young   \n",
       "4      African/m.01ng51t/m.01ng51t_0002.jpg      male       senior   \n",
       "...                                     ...       ...          ...   \n",
       "40602     Indian/m.0fgsc6/m.0fgsc6_0002.jpg      male  middle-aged   \n",
       "40603     Indian/m.0h06p_/m.0h06p__0001.jpg      male       senior   \n",
       "40604     Indian/m.0h06p_/m.0h06p__0003.jpg      male       senior   \n",
       "40605     Indian/m.0h06p_/m.0h06p__0002.jpg      male       senior   \n",
       "40606     Indian/m.0h06p_/m.0h06p__0004.jpg      male       senior   \n",
       "\n",
       "      A1_Skin Color  A1_Ancestry A1_Hair Color A1_Bangs A1_Bald  A1_Beard  \\\n",
       "0              dark        black        blonde       no      no      full   \n",
       "1              dark        black         black       no      no      full   \n",
       "2              dark        black         black       no      no   stubble   \n",
       "3              dark        black         black       no      no      full   \n",
       "4              dark        black       unknown       no      no  mustache   \n",
       "...             ...          ...           ...      ...     ...       ...   \n",
       "40602        medium  south_asian         black       no      no        no   \n",
       "40603        medium  south_asian          gray       no      no        no   \n",
       "40604        medium  south_asian          gray       no      no        no   \n",
       "40605        medium  south_asian          gray       no      no        no   \n",
       "40606        medium  south_asian          gray       no      no        no   \n",
       "\n",
       "      A1_Glasses  ... A5_Gender       A5_Age A5_Skin Color  A5_Ancestry  \\\n",
       "0             no  ...      male        young          dark        black   \n",
       "1             no  ...      male        young          dark        black   \n",
       "2             no  ...      male        young          dark        black   \n",
       "3             no  ...      male        young          dark        black   \n",
       "4             no  ...      male  middle-aged          dark        black   \n",
       "...          ...  ...       ...          ...           ...          ...   \n",
       "40602         no  ...      male  middle-aged        medium  south_asian   \n",
       "40603         no  ...      male       senior         light  south_asian   \n",
       "40604         no  ...      male       senior         light  south_asian   \n",
       "40605         no  ...      male       senior         light  south_asian   \n",
       "40606         no  ...      male  middle-aged         light  south_asian   \n",
       "\n",
       "      A5_Hair Color A5_Bangs A5_Bald  A5_Beard A5_Glasses A5_Headwear  \n",
       "0            blonde       no      no  mustache         no          no  \n",
       "1             black       no      no       yes         no          no  \n",
       "2             black       no      no        no         no          no  \n",
       "3             black       no      no       yes         no          no  \n",
       "4             black       no      no  mustache         no         hat  \n",
       "...             ...      ...     ...       ...        ...         ...  \n",
       "40602         black       no      no        no         no          no  \n",
       "40603          gray       no      no        no         no          no  \n",
       "40604          gray       no      no        no         no          no  \n",
       "40605          gray       no      no        no         no          no  \n",
       "40606         black       no      no        no         no          no  \n",
       "\n",
       "[40607 rows x 51 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_merged"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "57af76f2-1f35-4580-a9db-e2e9a846937e",
   "metadata": {},
   "source": [
    "# Majority Voting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "bcb41a60-6620-45b2-ae1a-a5ee591c4773",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 4. Define your attributes and a helper to pick a majority label\n",
    "attributes = [\n",
    "    \"Gender\",\"Age\",\"Skin Color\",\"Ancestry\",\"Hair Color\",\n",
    "    \"Bangs\",\"Bald\",\"Beard\",\"Glasses\",\"Headwear\"\n",
    "]\n",
    "\n",
    "def majority_vote_excluding_unknown(row, cols, unknown_value=\"unknown\"):\n",
    "    values = row[cols]\n",
    "\n",
    "    # Filter out 'unknown' values\n",
    "    valid_values = values[values != unknown_value]\n",
    "    n_valid = len(valid_values)\n",
    "\n",
    "    if n_valid == 0:\n",
    "        return pd.NA  # Only 'unknown' values or all missing\n",
    "\n",
    "    # Compute majority threshold (strict majority)\n",
    "    majority_threshold = (n_valid // 2) + 1\n",
    "\n",
    "    # Count occurrences\n",
    "    counts = valid_values.value_counts()\n",
    "    top = counts[counts >= majority_threshold]\n",
    "\n",
    "    if len(top) == 1:\n",
    "        return top.index[0]\n",
    "    \n",
    "    return 'unknown'  # No clear majority\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "43d4d3aa-f303-4ea0-b2b7-a804e5f15b1b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 5. Build the consensus DataFrame\n",
    "consensus = pd.DataFrame({\"File Name\": df_merged[\"File Name\"]})\n",
    "for attr in attributes:\n",
    "    cols = [f\"A{i}_{attr}\" for i in range(1, len(csv_paths)+1)]\n",
    "    consensus[attr] = df_merged.apply(lambda r: majority_vote_excluding_unknown(r, cols), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "c3dca9b9-3759-49e7-867f-0d183bf1bc62",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>File Name</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Age</th>\n",
       "      <th>Skin Color</th>\n",
       "      <th>Ancestry</th>\n",
       "      <th>Hair Color</th>\n",
       "      <th>Bangs</th>\n",
       "      <th>Bald</th>\n",
       "      <th>Beard</th>\n",
       "      <th>Glasses</th>\n",
       "      <th>Headwear</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>African/m.03c31xs/m.03c31xs_0001.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>young</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>blonde</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>unknown</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>African/m.03c31xs/m.03c31xs_0004.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>young</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>stubble</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>African/m.03c31xs/m.03c31xs_0003.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>young</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>African/m.03c31xs/m.03c31xs_0002.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>young</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>unknown</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>African/m.01ng51t/m.01ng51t_0002.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>senior</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>mustache</td>\n",
       "      <td>no</td>\n",
       "      <td>hat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40602</th>\n",
       "      <td>Indian/m.0fgsc6/m.0fgsc6_0002.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>medium</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40603</th>\n",
       "      <td>Indian/m.0h06p_/m.0h06p__0001.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>senior</td>\n",
       "      <td>medium</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>gray</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40604</th>\n",
       "      <td>Indian/m.0h06p_/m.0h06p__0003.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>senior</td>\n",
       "      <td>medium</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>gray</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40605</th>\n",
       "      <td>Indian/m.0h06p_/m.0h06p__0002.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>senior</td>\n",
       "      <td>medium</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>gray</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40606</th>\n",
       "      <td>Indian/m.0h06p_/m.0h06p__0004.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>medium</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>gray</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>40607 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  File Name Gender          Age Skin Color  \\\n",
       "0      African/m.03c31xs/m.03c31xs_0001.jpg   male        young       dark   \n",
       "1      African/m.03c31xs/m.03c31xs_0004.jpg   male        young       dark   \n",
       "2      African/m.03c31xs/m.03c31xs_0003.jpg   male        young       dark   \n",
       "3      African/m.03c31xs/m.03c31xs_0002.jpg   male        young       dark   \n",
       "4      African/m.01ng51t/m.01ng51t_0002.jpg   male       senior       dark   \n",
       "...                                     ...    ...          ...        ...   \n",
       "40602     Indian/m.0fgsc6/m.0fgsc6_0002.jpg   male  middle-aged     medium   \n",
       "40603     Indian/m.0h06p_/m.0h06p__0001.jpg   male       senior     medium   \n",
       "40604     Indian/m.0h06p_/m.0h06p__0003.jpg   male       senior     medium   \n",
       "40605     Indian/m.0h06p_/m.0h06p__0002.jpg   male       senior     medium   \n",
       "40606     Indian/m.0h06p_/m.0h06p__0004.jpg   male  middle-aged     medium   \n",
       "\n",
       "          Ancestry Hair Color Bangs Bald     Beard Glasses Headwear  \n",
       "0            black     blonde    no   no   unknown      no       no  \n",
       "1            black      black    no   no   stubble      no       no  \n",
       "2            black      black    no   no        no      no       no  \n",
       "3            black      black    no   no   unknown      no       no  \n",
       "4            black      black    no   no  mustache      no      hat  \n",
       "...            ...        ...   ...  ...       ...     ...      ...  \n",
       "40602  south_asian      black    no   no        no      no       no  \n",
       "40603  south_asian       gray    no   no        no      no       no  \n",
       "40604  south_asian       gray    no   no        no      no       no  \n",
       "40605  south_asian       gray    no   no        no      no       no  \n",
       "40606  south_asian       gray    no   no        no      no       no  \n",
       "\n",
       "[40607 rows x 11 columns]"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "consensus"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "316f8315-6b32-486c-a964-c412ed140500",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 6. Save to CSV\n",
    "consensus.to_csv(\"labels_consensus.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "id": "f0f0a803-8629-404e-b2b5-6de97a52f486",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>File Name</th>\n",
       "      <th>A1_Gender</th>\n",
       "      <th>A1_Age</th>\n",
       "      <th>A1_Skin Color</th>\n",
       "      <th>A1_Ancestry</th>\n",
       "      <th>A1_Hair Color</th>\n",
       "      <th>A1_Bangs</th>\n",
       "      <th>A1_Bald</th>\n",
       "      <th>A1_Beard</th>\n",
       "      <th>A1_Glasses</th>\n",
       "      <th>...</th>\n",
       "      <th>A5_Gender</th>\n",
       "      <th>A5_Age</th>\n",
       "      <th>A5_Skin Color</th>\n",
       "      <th>A5_Ancestry</th>\n",
       "      <th>A5_Hair Color</th>\n",
       "      <th>A5_Bangs</th>\n",
       "      <th>A5_Bald</th>\n",
       "      <th>A5_Beard</th>\n",
       "      <th>A5_Glasses</th>\n",
       "      <th>A5_Headwear</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>African/m.0b76t68/m.0b76t68_0001.jpg</td>\n",
       "      <td>female</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>medium</td>\n",
       "      <td>black</td>\n",
       "      <td>unknown</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>...</td>\n",
       "      <td>female</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>light</td>\n",
       "      <td>black</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>hijab</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>African/m.01mddpy/m.01mddpy_0004.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>medium</td>\n",
       "      <td>black</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>full</td>\n",
       "      <td>sun</td>\n",
       "      <td>...</td>\n",
       "      <td>male</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>mustache</td>\n",
       "      <td>sun</td>\n",
       "      <td>beanie</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>African/m.05tjb8/m.05tjb8_0001.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>young</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>full</td>\n",
       "      <td>no</td>\n",
       "      <td>...</td>\n",
       "      <td>male</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>mustache</td>\n",
       "      <td>no</td>\n",
       "      <td>helmet</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>245</th>\n",
       "      <td>African/m.0dkcmr/m.0dkcmr_0002.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>senior</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>gray</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>full</td>\n",
       "      <td>regular</td>\n",
       "      <td>...</td>\n",
       "      <td>male</td>\n",
       "      <td>senior</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>gray</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>mustache</td>\n",
       "      <td>regular</td>\n",
       "      <td>beanie</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>284</th>\n",
       "      <td>African/m.07l8mg/m.07l8mg_0001.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>unknown</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>stubble</td>\n",
       "      <td>no</td>\n",
       "      <td>...</td>\n",
       "      <td>male</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>dark</td>\n",
       "      <td>black</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>mustache</td>\n",
       "      <td>no</td>\n",
       "      <td>beanie</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39910</th>\n",
       "      <td>Indian/m.08g1fr/m.08g1fr_0001.jpg</td>\n",
       "      <td>female</td>\n",
       "      <td>young</td>\n",
       "      <td>medium</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>...</td>\n",
       "      <td>male</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>dark</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>beanie</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40109</th>\n",
       "      <td>Indian/m.0cc6442/m.0cc6442_0003.jpg</td>\n",
       "      <td>female</td>\n",
       "      <td>young</td>\n",
       "      <td>medium</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>...</td>\n",
       "      <td>female</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>light</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>hat</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40276</th>\n",
       "      <td>Indian/m.03ct0gd/m.03ct0gd_0001.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>light</td>\n",
       "      <td>middle_eastern</td>\n",
       "      <td>gray</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>mustache</td>\n",
       "      <td>no</td>\n",
       "      <td>...</td>\n",
       "      <td>male</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>light</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>mustache</td>\n",
       "      <td>no</td>\n",
       "      <td>cap</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40332</th>\n",
       "      <td>Indian/m.02m_dh/m.02m_dh_0001.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>medium</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>regular</td>\n",
       "      <td>...</td>\n",
       "      <td>male</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>light</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>regular</td>\n",
       "      <td>beanie</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40590</th>\n",
       "      <td>Indian/m.03c6hwt/m.03c6hwt_0002.jpg</td>\n",
       "      <td>male</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>medium</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>black</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>mustache</td>\n",
       "      <td>regular</td>\n",
       "      <td>...</td>\n",
       "      <td>male</td>\n",
       "      <td>middle-aged</td>\n",
       "      <td>medium</td>\n",
       "      <td>south_asian</td>\n",
       "      <td>black</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>mustache</td>\n",
       "      <td>regular</td>\n",
       "      <td>beanie</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>317 rows × 51 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  File Name A1_Gender       A1_Age  \\\n",
       "13     African/m.0b76t68/m.0b76t68_0001.jpg    female  middle-aged   \n",
       "44     African/m.01mddpy/m.01mddpy_0004.jpg      male  middle-aged   \n",
       "96       African/m.05tjb8/m.05tjb8_0001.jpg      male        young   \n",
       "245      African/m.0dkcmr/m.0dkcmr_0002.jpg      male       senior   \n",
       "284      African/m.07l8mg/m.07l8mg_0001.jpg      male  middle-aged   \n",
       "...                                     ...       ...          ...   \n",
       "39910     Indian/m.08g1fr/m.08g1fr_0001.jpg    female        young   \n",
       "40109   Indian/m.0cc6442/m.0cc6442_0003.jpg    female        young   \n",
       "40276   Indian/m.03ct0gd/m.03ct0gd_0001.jpg      male  middle-aged   \n",
       "40332     Indian/m.02m_dh/m.02m_dh_0001.jpg      male  middle-aged   \n",
       "40590   Indian/m.03c6hwt/m.03c6hwt_0002.jpg      male  middle-aged   \n",
       "\n",
       "      A1_Skin Color     A1_Ancestry A1_Hair Color A1_Bangs A1_Bald  A1_Beard  \\\n",
       "13           medium           black       unknown       no      no        no   \n",
       "44           medium           black         black       no      no      full   \n",
       "96             dark           black         black       no      no      full   \n",
       "245            dark           black          gray       no      no      full   \n",
       "284            dark           black       unknown       no      no   stubble   \n",
       "...             ...             ...           ...      ...     ...       ...   \n",
       "39910        medium     south_asian         black       no      no        no   \n",
       "40109        medium     south_asian         black       no      no        no   \n",
       "40276         light  middle_eastern          gray       no      no  mustache   \n",
       "40332        medium     south_asian         black       no      no        no   \n",
       "40590        medium     south_asian         black      yes      no  mustache   \n",
       "\n",
       "      A1_Glasses  ... A5_Gender       A5_Age A5_Skin Color  A5_Ancestry  \\\n",
       "13            no  ...    female  middle-aged         light        black   \n",
       "44           sun  ...      male  middle-aged          dark        black   \n",
       "96            no  ...      male  middle-aged          dark        black   \n",
       "245      regular  ...      male       senior          dark        black   \n",
       "284           no  ...      male  middle-aged          dark        black   \n",
       "...          ...  ...       ...          ...           ...          ...   \n",
       "39910         no  ...      male  middle-aged          dark  south_asian   \n",
       "40109         no  ...    female  middle-aged         light  south_asian   \n",
       "40276         no  ...      male  middle-aged         light  south_asian   \n",
       "40332    regular  ...      male  middle-aged         light  south_asian   \n",
       "40590    regular  ...      male  middle-aged        medium  south_asian   \n",
       "\n",
       "      A5_Hair Color A5_Bangs A5_Bald  A5_Beard A5_Glasses A5_Headwear  \n",
       "13            black       no      no        no         no       hijab  \n",
       "44            black       no      no  mustache        sun      beanie  \n",
       "96            black       no      no  mustache         no      helmet  \n",
       "245            gray       no      no  mustache    regular      beanie  \n",
       "284           black       no      no  mustache         no      beanie  \n",
       "...             ...      ...     ...       ...        ...         ...  \n",
       "39910         black       no      no        no         no      beanie  \n",
       "40109         black       no      no        no         no         hat  \n",
       "40276         black       no      no  mustache         no         cap  \n",
       "40332         black       no      no        no    regular      beanie  \n",
       "40590         black       no      no  mustache    regular      beanie  \n",
       "\n",
       "[317 rows x 51 columns]"
      ]
     },
     "execution_count": 121,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_merged[df_merged[cols].apply(row_tie, axis=1)]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c13ef684-1491-4584-8393-50cef53ada29",
   "metadata": {},
   "source": [
    "## Paper's table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "1ca2c05a-daf8-4c0c-9305-08ed31712864",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total samples: 40607\n",
      "\n",
      "=== Gender ===\n",
      "male: 30191 (74.3%)\n",
      "female: 10400 (25.6%)\n",
      "unknown: 16 (0.0%)\n",
      "\n",
      "=== Age ===\n",
      "middle-aged: 19072 (47.0%)\n",
      "young: 16144 (39.8%)\n",
      "senior: 5312 (13.1%)\n",
      "unknown: 79 (0.2%)\n",
      "\n",
      "=== Skin color ===\n",
      "medium: 15499 (38.2%)\n",
      "light: 13701 (33.7%)\n",
      "dark: 10952 (27.0%)\n",
      "unknown: 455 (1.1%)\n",
      "\n",
      "=== Ancestry ===\n",
      "black: 10396 (25.6%)\n",
      "white: 10161 (25.0%)\n",
      "asian: 9547 (23.5%)\n",
      "south_asian: 9191 (22.6%)\n",
      "unknown: 769 (1.9%)\n",
      "latino/hispanic: 379 (0.9%)\n",
      "middle_eastern: 146 (0.4%)\n",
      "indigenous: 18 (0.0%)\n",
      "\n",
      "=== Hair color ===\n",
      "black: 26114 (64.3%)\n",
      "brown: 5795 (14.3%)\n",
      "gray: 5529 (13.6%)\n",
      "blonde: 1780 (4.4%)\n",
      "unknown: 935 (2.3%)\n",
      "red: 318 (0.8%)\n",
      "other: 129 (0.3%)\n",
      "nan: 7 (0.0%)\n",
      "\n",
      "=== Bangs ===\n",
      "no: 35280 (86.9%)\n",
      "yes: 5297 (13.0%)\n",
      "unknown: 30 (0.1%)\n",
      "\n",
      "=== Bald ===\n",
      "no: 37553 (92.5%)\n",
      "yes: 3047 (7.5%)\n",
      "unknown: 7 (0.0%)\n",
      "\n",
      "=== Beard ===\n",
      "no: 30356 (74.8%)\n",
      "stubble: 3940 (9.7%)\n",
      "mustache: 3677 (9.1%)\n",
      "unknown: 1371 (3.4%)\n",
      "full: 1263 (3.1%)\n",
      "\n",
      "=== Glasses ===\n",
      "no: 34589 (85.2%)\n",
      "regular: 4914 (12.1%)\n",
      "sun: 1080 (2.7%)\n",
      "unknown: 24 (0.1%)\n",
      "\n",
      "=== Headwear ===\n",
      "no: 35543 (87.5%)\n",
      "cap: 1991 (4.9%)\n",
      "hat: 1089 (2.7%)\n",
      "headband: 513 (1.3%)\n",
      "unknown: 451 (1.1%)\n",
      "beanie: 304 (0.7%)\n",
      "turban: 262 (0.6%)\n",
      "helmet: 255 (0.6%)\n",
      "hijab: 199 (0.5%)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# Load your annotated RFW CSV file\n",
    "df = pd.read_csv('labels_RFW.csv')\n",
    "\n",
    "# List of annotation columns (adjust if needed)\n",
    "attributes = [\n",
    "    \"Gender\",\"Age\",\"Skin Color\",\"Ancestry\",\"Hair Color\",\n",
    "    \"Bangs\",\"Bald\",\"Beard\",\"Glasses\",\"Headwear\"\n",
    "]\n",
    "\n",
    "# Total number of samples\n",
    "total_samples = len(df)\n",
    "print(f\"Total samples: {total_samples}\\n\")\n",
    "\n",
    "# Function to display count and percentage\n",
    "def summarize_category(df, column):\n",
    "    print(f\"=== {column.capitalize().replace('_', ' ')} ===\")\n",
    "    value_counts = df[column].value_counts(dropna=False)\n",
    "    for value, count in value_counts.items():\n",
    "        percent = (count / total_samples) * 100\n",
    "        print(f\"{value}: {count} ({percent:.1f}%)\")\n",
    "    print()\n",
    "\n",
    "# Generate summary for each annotation\n",
    "for col in attributes:\n",
    "    if col in df.columns:\n",
    "        summarize_category(df, col)\n",
    "    else:\n",
    "        print(f\"Warning: Column '{col}' not found in CSV.\")\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1fb0a7b0-228a-4925-9fbb-e49c32a90324",
   "metadata": {},
   "outputs": [],
   "source": [
    "}"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
