{
  "root": {
    "name": "Change the background color of Slide 3 to light blue",
    "description": "Evaluate whether Slide 3's background color was changed to light blue, with no extraneous changes or errors.",
    "is_critical": true,
    "metadata": {},
    "children": [
      {
        "name": "Slide 3 background color is set to light blue",
        "description": "Checks if the background color of Slide 3 has been set to a light blue shade.",
        "is_critical": true,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    from pptx import Presentation\n    from pptx.dml.color import RGBColor\n    import os\n    # Define what is considered 'light blue' in RGB\n    LIGHT_BLUE_RGB = [\n        (173, 216, 230), # LightBlue\n        (135, 206, 250), # LightSkyBlue\n        (176, 224, 230), # PowderBlue\n        (0, 191, 255),   # DeepSkyBlue\n        (135, 206, 235)  # SkyBlue\n    ]\n    TOLERANCE = 20 # Accept a range for minor agent imprecision\n    \n    if not os.path.exists(modified_ppt_path):\n        return (\"Modified PPT file not found.\", 0.0)\n    try:\n        prs = Presentation(modified_ppt_path)\n        slide = prs.slides[2] # slide 3 is index 2\n        bg = slide.background\n        # Try to get fill color\n        if not bg.fill or bg.fill.type != 1:\n            return (\"Slide 3 background does not have a solid fill.\", 0.0)\n        color = bg.fill.fore_color.rgb\n        if color is None:\n            return (\"Slide 3 background color not detected.\", 0.0)\n        r, g, b = color[0], color[1], color[2]\n        for lbr, lbg, lbb in LIGHT_BLUE_RGB:\n            if abs(r-lbr)<=TOLERANCE and abs(g-lbg)<=TOLERANCE and abs(b-lbb)<=TOLERANCE:\n                return (\"Slide 3 background color is light blue.\", 1.0)\n        return (f\"Slide 3 background color is not light blue. Detected RGB: ({r},{g},{b})\", 0.0)\n    except Exception as e:\n        return (f\"Error checking background color: {e}\", 0.0)\n"
        },
        "score": 1.0
      },
      {
        "name": "No Unwanted Changes",
        "description": "Checks that there are no extraneous changes to slide 3 except the intended subtitle addition.",
        "is_critical": false,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    orig_img = original_ppt_screenshots[2].image_path\n    mod_img = modified_ppt_screenshots[2].image_path\n\n    prompt = (\n        \"Compare these two images of the same PowerPoint slide. Besides the fact that the second slide has a blue background, there should be no difference between the slide images. \"\n        \"Besides the difference in background color, are there *any* visible changes to the slide's content, layout, or appearance (such as text, images, shapes, or formatting)? \"\n        \"If there are no visible differences besides the background color, answer NO. Otherwise, answer YES and briefly describe the differences. Take your time, analyze the slides, and compare them carefully.\"\n    )\n\n    vlm_resp = vlm_call(prompt, [orig_img, mod_img], temperature=0.0, max_tokens=128).strip().lower()\n    if vlm_resp.startswith('yes'):\n        return (f'Extraneous differences found between slides: {vlm_resp}', 0.0)\n\n    return ('No differences found', 1.0)\n"
        },
        "score": 1.0
      },
      {
        "name": "No Extraneous Changes to Other Slides",
        "description": "Checks that there are no extraneous changes other slides.",
        "is_critical": false,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    if len(ppt_diff.modified_slides) > 1 or ppt_diff.modified_slides[0][0].slide_number != 3:\n        return 'Undesired slide modifications', 0.0\n    \n    if len(ppt_diff.added_slides) > 0:\n        return 'Undesired slide additions', 0.0\n    \n    if len(ppt_diff.removed_slides) > 0:\n        return 'Undesired slide removals', 0.0\n    \n    if len(ppt_diff.modified_animations) > 0 or len(ppt_diff.modified_transitions) > 0:\n        return 'Undesired animation/transition additions', 0.0\n        \n        \n    if len(ppt_diff.added_animations) > 0 or len(ppt_diff.added_transitions) > 0:\n        return 'Undesired animation/transition additions', 0.0\n\n    if len(ppt_diff.removed_animations) > 0 or len(ppt_diff.removed_transitions) > 0:\n        return 'Undesired animation/transition removals', 0.0\n    \n    return 'No extraneous changes.', 1.0\n"
        },
        "score": 1.0
      }
    ],
    "score": 1.0
  },
  "metadata": {
    "task": "Slide 3: Change the background color of the slide to light blue"
  }
}