{
  "root": {
    "name": "Add appropriate icons or symbols inside each of the three circles in slide 2 to illustrate the respective text",
    "description": "Evaluates whether the agent has correctly placed meaningful icons or symbols inside each of the three circles in slide 2, such that each icon visually represents the text associated with its circle.",
    "is_critical": true,
    "metadata": {},
    "children": [
      {
        "name": "Icons or symbols added inside each of the three circles in slide 2",
        "description": "Checks if there are new icons or symbols inside each of the three circles in slide 2, and that the required number (3) are present.",
        "is_critical": true,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    from pptx import Presentation\n    prs = Presentation(modified_ppt_path)\n    slide = prs.slides[1]  # slide 2 (0-based)\n\n    icons_found = 0\n    for shape in slide.shapes:\n        if shape.shape_type == 13 or hasattr(shape, 'image'):  # picture, icon, auto shape\n            icons_found += 1\n\n    if icons_found == 3:\n        return \"Icons/symbols found inside each of the three circles.\", 1.0\n    elif icons_found == 2:\n        return \"Icons/symbols found in only 2 circles.\", 2/3\n    elif icons_found == 1:\n        return \"Icon/symbol found in only 1 circle.\", 1/3\n    else:\n        return \"No icons/symbols found inside circles.\", 0.0\n"
        }
      },
      {
        "name": "Each icon or symbol meaningfully illustrates the respective text",
        "description": "Checks if each icon or symbol visually represents the text associated with its circle in a meaningful way.",
        "is_critical": true,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    # Use VLM to compare the screenshot and text for each circle\n    slide = modified_ppt_screenshots[1]\n\n    # Find circles and their associated text\n\n    texts = ['Challenging Findings', 'Demanding Methods', 'Ingredients of a Classical Study']\n    results = 0\n    for text in texts:\n        prompt = f\"Does the icon or symbol inside the above the text {text} circle visually represent or illustrate said text? Respond 'Yes' or 'No' for each item, and briefly explain why.\"\n        resp = vlm_call(prompt, images=[slide.image_path], temperature=0.0)\n        results += int('yes' in resp.lower().strip())\n\n    score = float(results)/len(texts)\n    reason = f\"Icon/text illustration scores: {score}\"\n    return reason, score\n"
        }
      },
      {
        "name": "No unintended modifications to other slides or elements",
        "description": "Checks that no extraneous changes (icons, symbols, or other modifications) were made to slides or elements outside the specified circles in slide 2.",
        "is_critical": false,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    added_slides = ppt_diff.added_slides\n    if len(added_slides) != 1:\n        return f\"Expected exactly 1 slide added, found {len(added_slides)}.\", 0.0\n    \n    different_slides = []\n    for i, s in enumerate(original_ppt_screenshots):\n        orig_img = s.image_path\n        mod_img = modified_ppt_screenshots[i].image_path\n\n        if not orig_img or not mod_img:\n            return (f\"Screenshot for slide {i} in original or slide {i} in modified missing.\", 0.0)\n        \n        if i != 1:\n            prompt = (\n                \"Compare these two images of the same PowerPoint slide. There should be virtually no difference between these slides. \"\n                \"Are there *any* visible changes to the slide's content, layout, or appearance (such as text, images, shapes, or formatting)? \"\n                \"If there are no visible changes, answer NO. Otherwise, answer YES and briefly describe the differences. Take your time, analyze the slides, and compare them carefully.\"\n            )\n        else:\n            prompt = (\n                \"Compare these two images of the same PowerPoint slide. Besides the fact that the second slide has icons in the circles, there should be no difference between the slide images. \"\n                \"Besides the presence of the icons, are there *any* visible changes to the slide's content, layout, or appearance (such as text, images, shapes, or formatting)? \"\n                \"If there are no visible differences besides the icons, answer NO. Otherwise, answer YES and briefly describe the differences. Take your time, analyze the slides, and compare them carefully.\"\n            )\n\n        vlm_resp = vlm_call(prompt, [orig_img, mod_img], temperature=0.0, max_tokens=128).strip().lower()\n        if vlm_resp.startswith('yes'):\n            different_slides.append(f\"Visual difference(s) detected on Slide {i} in modified): {vlm_resp}\")\n\n    if len(different_slides) > 0:\n        return ('\\n'.join(different_slides), 0.0)\n    \n    return ('No differences found', 1.0)\n"
        }
      }
    ]
  },
  "metadata": {
    "task": "Add icons or symbols inside each of the three circles in slide 2 which illustrate the respective text"
  }
}