{
  "root": {
    "name": "Add and Position Text Box Explaining 'Prostomium' on Slide 4",
    "description": "Evaluates whether the agent correctly adds a text box to Slide 4 with an explanation of 'prostomium' and positions it above the word 'prostomium' in the diagram, with no extraneous changes.",
    "is_critical": true,
    "metadata": {},
    "children": [
      {
        "name": "Presence of Added Text Box",
        "description": "Verifies that a text box was added to Slide 4.",
        "is_critical": true,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    added = [s for s in ppt_diff.added_slides if s.slide_number == 4]\n    if added:\n        return \"Slide 4 was added, not modified. Should not be the case.\", 0.0\n    # Get all shapes added to slide 4 (use added objects from pptx)\n    from pptx import Presentation\n    pres = Presentation(modified_ppt_path)\n    slide = pres.slides[3]  # 0-indexed\n    # Check for textboxes\n    textbox_found = False\n    for shape in slide.shapes:\n        if shape.shape_type == 17:  # MSO_SHAPE_TYPE.TEXT_BOX\n            # Next, is it new?\n            # Compare with original\n            orig_pres = Presentation(original_ppt_path)\n            orig_slide = orig_pres.slides[3]\n            orig_texts = set(s.text.strip() for s in orig_slide.shapes if hasattr(s, 'text'))\n            if shape.has_text_frame and shape.text.strip() and shape.text.strip() not in orig_texts:\n                textbox_found = True\n                break\n    if textbox_found:\n        return \"A new text box was added to Slide 4.\", 1.0\n    else:\n        return \"No new text box added to Slide 4.\", 0.0\n"
        },
        "score": 1.0
      },
      {
        "name": "Text Box Content is an Explanation, Not Just the Word",
        "description": "Ensures the new text box includes an explanation or definition of 'prostomium' rather than just the term itself.",
        "is_critical": true,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    from pptx import Presentation\n    import re\n    pres = Presentation(modified_ppt_path)\n    slide = pres.slides[3]\n    # Get all texts on original slide 4\n    orig_pres = Presentation(original_ppt_path)\n    orig_slide = orig_pres.slides[3]\n    orig_texts = set(s.text.strip() for s in orig_slide.shapes if hasattr(s, 'text'))\n    # Find new textbox\n    for shape in slide.shapes:\n        if shape.shape_type == 17 and shape.has_text_frame:\n            txt = shape.text.strip()\n            if txt and txt not in orig_texts:\n                # Check if it contains the word 'prostomium' and some form of explanation\n                # Look for a sentence or clause containing 'prostomium' and a definition-like structure\n                if re.search(r'prostomium', txt, re.IGNORECASE):\n                    # Must have more than just the word; should be a phrase/sentence\n                    if len(txt.split()) > 2:\n                        keywords = [\"is\", \"means\", \"refers\", \"part\", \"region\", \"structure\", \"anterior\", \"front\"]\n                        if any(k in txt.lower() for k in keywords):\n                            return \"Text box contains an explanation of 'prostomium'.\", 1.0\n                elif len(txt) > 20:\n                    # If explanation doesn't repeat the word but is clearly a definition\n                    return \"Text box contains a definition/explanation without repeating 'prostomium'.\", 1.0\n    return \"No explanation of 'prostomium' found in added text box.\", 0.0\n"
        },
        "score": 1.0
      },
      {
        "name": "Text Box Relative Position to Diagram Word",
        "description": "Ensures the new text box is above the occurrence of 'prostomium' in the diagram on Slide 4.",
        "is_critical": true,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    # Use VLM to check if explanation is above the diagram.\n    # Compare original and modified screenshots for Slide 4\n    mod_img = None\n    for s in modified_ppt_screenshots:\n        if s.slide_number == 4:\n            mod_img = s.image_path\n            break\n    if not mod_img:\n        return (\"Slide 4 screenshot missing in original or modified.\", 0.0)\n    prompt = (\n        \"Carefully analyze this image of a PowerPoint slide. It should contain a diagram of a worm with labelled body parts, along with a textbox explaining what the 'prostomium' of a worm is. \"\n        \"If the explanation of the 'prostomium' exists, is the textbox with the explanation above the word 'prostomium' in the diagram? \"\n        \"If the textbox is above the word and not blocking any text or part of the diagram, answer YES. \"\n        \"If the textbox is not above the word or the explanation does not exist, answer NO. \"\n        \"If the textbox is blocking any other text or part of the diagram, answer BLOCKING and briefly describe what is blocked. \"\n        \"Output only YES, NO, or BLOCKING as the first word, followed by a brief explanation.\"\n    )\n    vlm_resp = vlm_call(prompt, [mod_img], temperature=0.0, max_tokens=128).strip().lower()\n    if vlm_resp.startswith('yes'):\n        return (\"The explanation of the 'prostomium' is above the word in the diagram, as required.\", 1.0)\n    elif vlm_resp.startswith('blocking'):\n        return (f\"The textbox is blocking part of the diagram or text: {vlm_resp}\", 0.5)\n    else:\n        return (f\"There is an issue with the explanation of the 'prostomium': {vlm_resp}\", 0.0)\n"
        },
        "score": 1.0
      },
      {
        "name": "No Unrelated Slides or Content Modified",
        "description": "Ensures that only the required addition was made and no unrelated slides, shapes, or content were added, removed, or changed.",
        "is_critical": false,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    # Only one shape added to slide 4 (the new textbox); no other changes allowed\n    # Other diffs should be empty\n    unrelated_changes = False\n    # Check for slide adds/removes/other slide modifications\n    if ppt_diff.added_slides or ppt_diff.removed_slides:\n        return \"Slides were added or removed.\", 0.0\n    # Any modified slides other than slide 4?\n    for s1, s2 in ppt_diff.modified_slides:\n        if s1.slide_number != 4:\n            return f\"Slide {s1.slide_number} was modified but shouldn't have been.\", 0.0\n    # Check for added/removed/modified objects on other slides\n    # Check for more than 1 shape added to slide 4\n    from pptx import Presentation\n    pres = Presentation(modified_ppt_path)\n    slide = pres.slides[3]\n    orig_pres = Presentation(original_ppt_path)\n    orig_slide = orig_pres.slides[3]\n    orig_texts = set(s.text.strip() for s in orig_slide.shapes if hasattr(s, 'text'))\n    new_textboxes = [s for s in slide.shapes if s.shape_type == 17 and s.has_text_frame and s.text.strip() and s.text.strip() not in orig_texts]\n    if len(new_textboxes) != 1:\n        return f\"Number of new text boxes on Slide 4 is {len(new_textboxes)} (should be 1).\", 0.0\n    # Check for added/removed/modified animations or transitions\n    if ppt_diff.added_animations or ppt_diff.removed_animations or ppt_diff.modified_animations:\n        return \"Animations were changed.\", 0.0\n    if ppt_diff.added_transitions or ppt_diff.removed_transitions or ppt_diff.modified_transitions:\n        return \"Transitions were changed.\", 0.0\n    # Check for any added/removed/modified content on other slides\n    # (We already checked slides above)\n    return \"No extraneous changes detected.\", 1.0\n"
        },
        "score": 1.0
      }
    ],
    "score": 1.0
  },
  "metadata": {
    "task": "Add a text box in Slide 4 explaining what 'prostomium' means and position it above the word  'prostomium' on the diagram"
  }
}