{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "executionInfo": {
     "elapsed": 386,
     "status": "ok",
     "timestamp": 1756538252009,
     "user": {
      "displayName": "Nicla Faccioli",
      "userId": "11628056964833727666"
     },
     "user_tz": -120
    },
    "id": "sshB_Ckyk7xW"
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from io import StringIO"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "executionInfo": {
     "elapsed": 26,
     "status": "ok",
     "timestamp": 1756538252256,
     "user": {
      "displayName": "Nicla Faccioli",
      "userId": "11628056964833727666"
     },
     "user_tz": -120
    },
    "id": "g1qoizGxm10l"
   },
   "outputs": [],
   "source": [
    "csv_path = '/content/complexGemini-G_BD_extended.csv'\n",
    "\n",
    "df = pd.read_csv(csv_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "_ur6028slALd"
   },
   "outputs": [],
   "source": [
    "# ID ranges and categories\n",
    "categories = {\n",
    "    \"missing_elements, 1 obj\": (0, 7),\n",
    "    \"missing_elements, 2 obj\": (8, 15),\n",
    "    \"missing_elements, 3 obj\": (16, 23),\n",
    "    \"missing_elements, 4 obj\": (24, 31),\n",
    "    \"unconventional_layout, 2 obj\": (32, 47),\n",
    "    \"unconventional_layout, 4 obj\": (48, 63),\n",
    "    \"overlapping_bboxes, 1 obj\": (64, 71),\n",
    "    \"overlapping_bboxes, 2 obj\": (72, 79),\n",
    "    \"overlapping_bboxes, 3 obj\": (80, 87),\n",
    "    \"overlapping_bboxes, 4 obj\": (88, 95),\n",
    "    \"small_bboxes, 1 obj\": (96, 103),\n",
    "    \"small_bboxes, 2 obj\": (104, 111),\n",
    "    \"small_bboxes, 3 obj\": (112, 119),\n",
    "    \"small_bboxes, 4 obj\": (120, 127),\n",
    "    \"color_binding, 1 obj\": (128, 135),\n",
    "    \"color_binding, 2 obj\": (136, 143),\n",
    "    \"color_binding, 3 obj\": (144, 151),\n",
    "    \"color_binding, 4 obj\": (152, 159),\n",
    "    \"open_binding, 1 obj\": (160, 167),\n",
    "    \"open_binding, 2 obj\": (168, 175),\n",
    "    \"open_binding, 3 obj\": (176, 183),\n",
    "    \"open_binding, 4 obj\": (184, 191),\n",
    "    \"complex_scene, 1 obj\": (192, 199),\n",
    "    \"complex_scene, 2 obj\": (200, 207),\n",
    "    \"complex_scene, 3 obj\": (208, 215),\n",
    "    \"complex_scene, 4 obj\": (216, 223),\n",
    "}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "TTzY2xVz9isN"
   },
   "outputs": [],
   "source": [
    "# ID ranges and categories for reduced prompt collection\n",
    "categories = {\n",
    "    \"missing_elements, 1 obj\": (0, 7),\n",
    "    \"missing_elements, 2 obj\": (8, 13),\n",
    "    \"missing_elements, 3 obj\": (14, 19),\n",
    "    \"missing_elements, 4 obj\": (20, 21),\n",
    "    \"unconventional_layout, 2 obj\": (22, 34),\n",
    "    \"unconventional_layout, 4 obj\": (35, 43),\n",
    "    \"overlapping_bboxes, 1 obj\": (44, 51),\n",
    "    \"overlapping_bboxes, 2 obj\": (52, 59),\n",
    "    \"overlapping_bboxes, 3 obj\": (60, 66),\n",
    "    \"overlapping_bboxes, 4 obj\": (67, 72),\n",
    "    \"small_bboxes, 1 obj\": (73, 80),\n",
    "    \"small_bboxes, 2 obj\": (81, 85),\n",
    "    \"small_bboxes, 3 obj\": (86, 92),\n",
    "    \"color_binding, 1 obj\": (93, 100),\n",
    "    \"color_binding, 2 obj\": (101, 108),\n",
    "    \"color_binding, 3 obj\": (109, 114),\n",
    "    \"color_binding, 4 obj\": (115, 117),\n",
    "    \"open_binding, 1 obj\": (118, 125),\n",
    "    \"open_binding, 2 obj\": (126, 133),\n",
    "    \"open_binding, 3 obj\": (134, 140),\n",
    "    \"open_binding, 4 obj\": (141, 146),\n",
    "    \"complex_scene, 1 obj\": (147, 154),\n",
    "    \"complex_scene, 2 obj\": (155, 162),\n",
    "    \"complex_scene, 3 obj\": (163, 170),\n",
    "    \"complex_scene, 4 obj\": (171, 177),\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "MMWoDA3vbGG3"
   },
   "outputs": [],
   "source": [
    "# ID ranges and categories for missing elements\n",
    "categories = {\n",
    "    \"missing_elements, 1 obj\": (0, 511),\n",
    "    \"missing_elements, 2 obj\": (512, 1023),\n",
    "    \"missing_elements, 3 obj\": (1024, 1535),\n",
    "    \"missing_elements, 4 obj\": (1536, 2047)\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "u31CMkzS3bnk"
   },
   "outputs": [],
   "source": [
    "# ID ranges and categories for fullNewDataset\n",
    "categories = {\n",
    "    \"object_binding, 1 obj\": (0, 127),\n",
    "    \"object_binding, 2 obj\": (128, 255),\n",
    "    \"object_binding, 3 obj\": (256, 383),\n",
    "    \"object_binding, 4 obj\": (384, 511),\n",
    "    \"color_binding, 1 obj\": (512, 639),\n",
    "    \"color_binding, 2 obj\": (640, 767),\n",
    "    \"color_binding, 3 obj\": (768, 895),\n",
    "    \"color_binding, 4 obj\": (896, 1023),\n",
    "    \"attribute_binding, 1 obj\": (1024, 1151),\n",
    "    \"attribute_binding, 2 obj\": (1152, 1279),\n",
    "    \"attribute_binding, 3 obj\": (1280, 1407),\n",
    "    \"attribute_binding, 4 obj\": (1408, 1535),\n",
    "    \"overlapping_bboxes, 1 obj\": (1536, 1663),\n",
    "    \"overlapping_bboxes, 2 obj\": (1664, 1791),\n",
    "    \"overlapping_bboxes, 3 obj\": (1792, 1919),\n",
    "    \"overlapping_bboxes, 4 obj\": (1920, 2047),\n",
    "    \"small_bboxes, 1 obj\": (2048, 2175),\n",
    "    \"small_bboxes, 2 obj\": (2176, 2303),\n",
    "    \"small_bboxes, 3 obj\": (2304, 2431),\n",
    "    \"small_bboxes, 4 obj\": (2432, 2559),\n",
    "    \"object_relationship, 2 obj\": (2560, 2687),\n",
    "    \"object_relationship, 4 obj\": (2688, 2815),\n",
    "    \"complex_composition, 1 obj\": (2816, 2943),\n",
    "    \"complex_composition, 2 obj\": (2944, 3071),\n",
    "    \"complex_composition, 3 obj\": (3072, 3199),\n",
    "    \"complex_composition, 4 obj\": (3200, 3327),\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Bj7X1_e1EycV"
   },
   "outputs": [],
   "source": [
    "categories = {\n",
    "    \"open_set, 1 obj\": (0, 327),\n",
    "    \"open_set, 2 obj\": (328, 1389),\n",
    "    \"open_set, 3 obj\": (1390, 2447),\n",
    "    \"open_set, 4 obj\": (2448, 3008),\n",
    "    \"open_set, 5 obj\": (3009, 3196),\n",
    "    \"open_set, 6 obj\": (3197, 3274),\n",
    "    \"open_set, 7 obj\": (3275, 3301),\n",
    "    \"open_set, 8 obj\": (3302, 3318),\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "executionInfo": {
     "elapsed": 9,
     "status": "ok",
     "timestamp": 1756538259917,
     "user": {
      "displayName": "Nicla Faccioli",
      "userId": "11628056964833727666"
     },
     "user_tz": -120
    },
    "id": "yKPmda1uy6rN"
   },
   "outputs": [],
   "source": [
    "# complex prompts with different LLMs\n",
    "categories = {\n",
    "    \"complex_composition, 1 obj\": (0, 127),\n",
    "    \"complex_composition, 2 obj\": (128, 255),\n",
    "    \"complex_composition, 3 obj\": (256, 383),\n",
    "    \"complex_composition, 4 obj\": (384, 511),\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 30,
     "status": "ok",
     "timestamp": 1756538261658,
     "user": {
      "displayName": "Nicla Faccioli",
      "userId": "11628056964833727666"
     },
     "user_tz": -120
    },
    "id": "DK5V6v9Ek4Vh",
    "outputId": "a7bf88b1-d565-49db-8094-c3854fef17de"
   },
   "outputs": [],
   "source": [
    "# TIFA Score\n",
    "# Step 1: calculate per category\n",
    "print(\"\\n=== TEXT SCORES ===\")\n",
    "print(\"\\n=== Averages per Task and Number of Objects ===\")\n",
    "for category, (start_id, end_id) in categories.items():\n",
    "    mask = (df['id'] >= start_id) & (df['id'] <= end_id)\n",
    "    avg_tifa = df.loc[mask, 'tifa_score'].mean()\n",
    "    if pd.isna(avg_tifa):\n",
    "        print(f\"{category}: No data in this range.\")\n",
    "    else:\n",
    "        avg_str = f\"{avg_tifa:.4f}\".replace('.', ',')\n",
    "        print(f\"{category}: Average TIFA Score = {avg_str}\")\n",
    "\n",
    "# Step 2: group across tasks, by number of objects\n",
    "# create a mapping: how many objects -> list of IDs\n",
    "object_groups = {}\n",
    "\n",
    "for category, (start_id, end_id) in categories.items():\n",
    "    # Extract the number of objects from the category string\n",
    "    num_objects_str = category.split(',')[-1].strip()\n",
    "    if num_objects_str not in object_groups:\n",
    "        object_groups[num_objects_str] = []\n",
    "    object_groups[num_objects_str].extend(range(start_id, end_id + 1))\n",
    "\n",
    "print(\"\\n=== Averages per Number of Objects (All Tasks Together) ===\")\n",
    "# Sort the object groups by the number of objects for consistent output\n",
    "sorted_object_groups = dict(sorted(object_groups.items(), key=lambda item: int(item[0].split()[0])))\n",
    "\n",
    "for obj_count, id_list in sorted_object_groups.items():\n",
    "    mask = df['id'].isin(id_list)\n",
    "    avg_tifa = df.loc[mask, 'tifa_score'].mean()\n",
    "    if pd.isna(avg_tifa):\n",
    "        print(f\"{obj_count}: No data in this range.\")\n",
    "    else:\n",
    "        avg_str = f\"{avg_tifa:.4f}\".replace('.', ',')\n",
    "        print(f\"{obj_count}: Average TIFA Score = {avg_str}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 21,
     "status": "ok",
     "timestamp": 1756538263195,
     "user": {
      "displayName": "Nicla Faccioli",
      "userId": "11628056964833727666"
     },
     "user_tz": -120
    },
    "id": "7raZckoSoS1L",
    "outputId": "b9a84bf9-7ef8-4d7c-9960-8b60a426b0b3"
   },
   "outputs": [],
   "source": [
    "# AUC\n",
    "# Step 1: calculate per category\n",
    "print(\"\\n=== LAYOUT SCORES ===\")\n",
    "print(\"\\n=== Averages per Task and Number of Objects ===\")\n",
    "for category, (start_id, end_id) in categories.items():\n",
    "    mask = (df['id'] >= start_id) & (df['id'] <= end_id)\n",
    "    avg_auc = df.loc[mask, 'auc'].mean()\n",
    "    if pd.isna(avg_auc):\n",
    "        print(f\"{category}: No data in this range.\")\n",
    "    else:\n",
    "        avg_str = f\"{avg_auc:.4f}\".replace('.', ',')\n",
    "        print(f\"{category}: Average AUC = {avg_str}\")\n",
    "\n",
    "# Step 2: group across tasks, by number of objects\n",
    "# create a mapping: how many objects -> list of IDs\n",
    "object_groups = {}\n",
    "\n",
    "for category, (start_id, end_id) in categories.items():\n",
    "    # Extract the number of objects from the category string\n",
    "    num_objects_str = category.split(',')[-1].strip()\n",
    "    if num_objects_str not in object_groups:\n",
    "        object_groups[num_objects_str] = []\n",
    "    object_groups[num_objects_str].extend(range(start_id, end_id + 1))\n",
    "\n",
    "print(\"\\n=== Averages per Number of Objects (All Tasks Together) ===\")\n",
    "# Sort the object groups by the number of objects for consistent output\n",
    "sorted_object_groups = dict(sorted(object_groups.items(), key=lambda item: int(item[0].split()[0])))\n",
    "\n",
    "for obj_count, id_list in sorted_object_groups.items():\n",
    "    mask = df['id'].isin(id_list)\n",
    "    avg_auc = df.loc[mask, 'auc'].mean()\n",
    "    if pd.isna(avg_auc):\n",
    "        print(f\"{obj_count}: No data in this range.\")\n",
    "    else:\n",
    "        avg_str = f\"{avg_auc:.4f}\".replace('.', ',')\n",
    "        print(f\"{obj_count}: Average AUC = {avg_str}\")"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "authorship_tag": "ABX9TyM7YgYysKL5VhQ+vW/g9eIq",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
