{
  "root": {
    "name": "Replace Slide 11 diagram with a simpler flowchart: 'Browser → HTTP Request → Web Server → HTTP Response → Browser'",
    "description": "Evaluate whether the agent replaced the existing diagram on Slide 11 with a simpler flowchart showing the specified sequence.",
    "is_critical": true,
    "metadata": {},
    "children": [
      {
        "name": "Existing diagram on Slide 11 is removed",
        "description": "Checks that the original diagram on slide 11 is deleted or replaced.",
        "is_critical": true,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    # Check for removed shapes or images on slide 11\n\n    # Use vision model to check for removal\n    try:\n        orig_img = [s.image_path for s in original_ppt_screenshots if s.slide_number == 11][0]\n        mod_img = [s.image_path for s in modified_ppt_screenshots if s.slide_number == 11][0]\n        prompt = \"Has the main diagram or figure on the slide been removed or replaced? Reply Yes or No.\"\n        result = vlm_call(prompt, [orig_img, mod_img], temperature=0.0, max_tokens=5)\n        if 'yes' in result.lower():\n            return (\"Diagram removed or replaced as required.\", 1.0)\n        else:\n            return (\"Diagram not removed or replaced.\", 0.0)\n    except Exception as e:\n        return (f\"Unable to verify diagram removal: {e}\", 0.0)\n"
        },
        "score": 1.0
      },
      {
        "name": "Flowchart with correct sequence is present on Slide 11",
        "description": "Checks that a flowchart with the exact sequence 'Browser → HTTP Request → Web Server → HTTP Response → Browser' is present on Slide 11.",
        "is_critical": true,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    # Use VLM to check if new diagram is a flowchart with the specified sequence\n    try:\n        mod_img = [s.image_path for s in modified_ppt_screenshots if s.slide_number == 11][0]\n    except IndexError:\n        return (\"Modified slide 11 screenshot not found.\", 0.0)\n    prompt = (\n        \"Does this slide contain a flowchart or diagram that shows the following sequence, in order:\\n\"\n        \"Browser → HTTP Request → Web Server → HTTP Response → Browser\\n\"\n        \"Reply Yes if the flowchart with these steps and arrows in this order is present. Otherwise, reply No.\"\n    )\n    result = vlm_call(prompt, [mod_img], temperature=0.0, max_tokens=5)\n    if 'yes' in result.lower():\n        return (\"Correct flowchart with specified sequence is present.\", 1.0)\n    return (\"Correct flowchart is not present.\", 0.0)\n"
        },
        "score": 1.0
      },
      {
        "name": "Flowchart is simple (not overly complex)",
        "description": "Checks that the flowchart is simple and does not contain extraneous steps or elements.",
        "is_critical": false,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    # Use VLM to check for simplicity\n    try:\n        mod_img = [s.image_path for s in modified_ppt_screenshots if s.slide_number == 11][0]\n    except IndexError:\n        return (\"Modified slide 11 screenshot not found.\", 0.0)\n    prompt = (\n        \"Is the flowchart or diagram on this slide simple, showing ONLY the steps: \"\n        \"Browser, HTTP Request, Web Server, HTTP Response, Browser, without extra steps or decorations? \"\n        \"Reply Yes if it is simple, No if there are extra elements or steps.\"\n    )\n    result = vlm_call(prompt, [mod_img], temperature=0.0, max_tokens=5)\n    if 'yes' in result.lower():\n        return (\"Flowchart is simple as required.\", 1.0)\n    return (\"Flowchart contains extraneous elements or is too complex.\", 0.0)\n"
        },
        "score": 1.0
      },
      {
        "name": "No extraneous slides added or removed",
        "description": "Checks that no slides were added or removed in the presentation.",
        "is_critical": false,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    # Use ppt_diff for slide structure changes\n    if ppt_diff.added_slides:\n        return (\"Slides were added, which is extraneous.\", 0.0)\n    if ppt_diff.removed_slides:\n        return (\"Slides were removed, which is extraneous.\", 0.0)\n    return (\"No extraneous slides added or removed.\", 1.0)\n"
        },
        "score": 1.0
      },
      {
        "name": "No extraneous changes to other slides or features",
        "description": "Checks that no unrelated changes were made to slides other than slide 11, or to slide 11's animations/transitions or other features.",
        "is_critical": false,
        "metadata": {},
        "children": [
          {
            "name": "No extraneous text/font changes on other slides",
            "description": "Checks that text and font properties on slides other than slide 11 were not changed.",
            "is_critical": false,
            "metadata": {},
            "scorer": {
              "type": "function",
              "function_code": "def compute_score() -> tuple[str, float]:\n    from pptx import Presentation\n    orig = Presentation(original_ppt_path)\n    mod = Presentation(modified_ppt_path)\n    # For each slide except slide 11\n    for slide_idx in range(len(orig.slides)):\n        if slide_idx == 10:\n            continue\n        orig_slide = orig.slides[slide_idx]\n        mod_slide = mod.slides[slide_idx] if slide_idx < len(mod.slides) else None\n        if not mod_slide:\n            continue  # Slide might be deleted, not expected for this task\n        def extract_text_fontsize(shape):\n            data = []\n            if not shape.has_text_frame:\n                return data\n            for para in shape.text_frame.paragraphs:\n                for run in para.runs:\n                    if run.text.strip():\n                        size = run.font.size.pt if run.font.size else None\n                        data.append((run.text.strip(), size))\n            return data\n        def get_slide_text_fontsize(slide):\n            out = []\n            for shape in slide.shapes:\n                out.extend(extract_text_fontsize(shape))\n            return out\n        orig_text_sizes = get_slide_text_fontsize(orig_slide)\n        mod_text_sizes = get_slide_text_fontsize(mod_slide)\n        orig_map = {}\n        for t, sz in orig_text_sizes:\n            orig_map.setdefault(t, []).append(sz)\n        mod_map = {}\n        for t, sz in mod_text_sizes:\n            mod_map.setdefault(t, []).append(sz)\n        # For every text, check font sizes unchanged\n        for text in orig_map:\n            if text not in mod_map:\n                continue\n            if orig_map[text] != mod_map[text]:\n                return (f\"Font size for text '{text}' changed on slide {slide_idx+1}.\", 0.0)\n    return (\"No text/font changes on other slides.\", 1.0)\n"
            },
            "score": 1.0
          },
          {
            "name": "No extraneous changes to animations or transitions on any slide",
            "description": "Checks that no animation or transition was added, removed, or modified in the presentation.",
            "is_critical": false,
            "metadata": {},
            "scorer": {
              "type": "function",
              "function_code": "def compute_score() -> tuple[str, float]:\n    # Check ppt_diff for any animation/transition changes\n    if ppt_diff.added_animations or ppt_diff.removed_animations or ppt_diff.modified_animations:\n        return (\"Extraneous animation change(s) detected.\", 0.0)\n    if ppt_diff.added_transitions or ppt_diff.removed_transitions or ppt_diff.modified_transitions:\n        return (\"Extraneous transition change(s) detected.\", 0.0)\n    return (\"No extraneous animation or transition changes.\", 1.0)\n"
            },
            "score": 1.0
          }
        ],
        "score": 1.0
      }
    ],
    "score": 1.0
  },
  "metadata": {
    "task": "Slide 11: Replace the existing diagram with a simpler flowchart showing 'Browser → HTTP Request → Web Server → HTTP Response → Browser'"
  }
}