{
  "root": {
    "name": "Format Last Three Bullets on Slide 2 as Numbered List",
    "description": "Evaluate if, on slide 2, the last three bullets describing types of worms are formatted together as a numbered list, with no extraneous changes.",
    "is_critical": true,
    "metadata": {},
    "children": [
      {
        "name": "Bullets Detailing Types of Worms Are Detected and Numbered",
        "description": "Checks that the three correct bullets are detected and now form a single numbered list on slide 2.",
        "is_critical": true,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    mod_img = None\n    for s in modified_ppt_screenshots:\n        if s.slide_number == 2:\n            mod_img = s.image_path\n            break\n    if not mod_img:\n        return (\"Slide 2 screenshot missing in modified.\", 0.0)\n    \n    prompt = (\n        \"Carefully analyize this image of a PowerPoint slide. \"\n        \"Is it the case that the paragraphs with keywords \\\"flatworms\\\", \\\"segmented worms\\\", and \\\"roundworms\\\" are part of a numbered list? \"\n        \"If the paragraphs in question are part of a numbered list, answer YES. Otherwise (say, for example, they make up an unordered bulleted list), answer NO and briefly describe the issue.\"\n    )\n    vlm_resp = vlm_call(prompt, [mod_img], temperature=0.0, max_tokens=128).strip().lower()\n    if vlm_resp.startswith('yes'):\n        return (\"The relevant paragraphs make up a numbered list, as required.\", 1.0)\n    else:\n        return (f\"The relevant paragraphs DO NOT make up a numbered list: {vlm_resp}\", 0.0)\n"
        },
        "score": 1.0,
        "reason": "The relevant paragraphs make up a numbered list, as required."
      },
      {
        "name": "No Extraneous Changes to Slide Content",
        "description": "Checks that no other content on slide 2 has been altered (besides the bullet formatting).",
        "is_critical": false,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    from pptx import Presentation\n    from pptx.enum.shapes import MSO_SHAPE_TYPE\n    \n    pres_original = Presentation(original_ppt_path)\n    pres_modified = Presentation(modified_ppt_path)\n    slide_idx = 1\n    slide_original = pres_original.slides[slide_idx]\n    slide_modified = pres_modified.slides[slide_idx]\n\n    differences = []\n    \n    # Check if the number of shapes is the same\n    if len(slide_original.shapes) != len(slide_modified.shapes):\n        differences.append(f\"Shape count mismatch: original has {len(slide_original.shapes)}, modified has {len(slide_modified.shapes)}\")\n        return ('; '.join(differences), 0.0)\n    \n    # Compare each shape\n    for idx, (shape_orig, shape_mod) in enumerate(zip(slide_original.shapes, slide_modified.shapes)):\n        # Check shape type\n        if shape_orig.shape_type != shape_mod.shape_type:\n            differences.append(f\"Shape {idx}: type mismatch ({shape_orig.shape_type} vs {shape_mod.shape_type})\")\n            continue\n        \n        # Check shape position and size\n        if shape_orig.left != shape_mod.left:\n            differences.append(f\"Shape {idx}: left position differs ({shape_orig.left} vs {shape_mod.left})\")\n        if shape_orig.top != shape_mod.top:\n            differences.append(f\"Shape {idx}: top position differs ({shape_orig.top} vs {shape_mod.top})\")\n        if shape_orig.width != shape_mod.width:\n            differences.append(f\"Shape {idx}: width differs ({shape_orig.width} vs {shape_mod.width})\")\n        if shape_orig.height != shape_mod.height:\n            differences.append(f\"Shape {idx}: height differs ({shape_orig.height} vs {shape_mod.height})\")\n        \n        # Check text content if shape has text\n        if shape_orig.has_text_frame and shape_mod.has_text_frame:\n            text_orig = shape_orig.text_frame.text\n            text_mod = shape_mod.text_frame.text\n            if text_orig != text_mod:\n                differences.append(f\"Shape {idx}: text differs ('{text_orig}' vs '{text_mod}')\")\n            \n            # Check paragraph count\n            if len(shape_orig.text_frame.paragraphs) != len(shape_mod.text_frame.paragraphs):\n                differences.append(f\"Shape {idx}: paragraph count differs\")\n        elif shape_orig.has_text_frame != shape_mod.has_text_frame:\n            differences.append(f\"Shape {idx}: text frame presence mismatch\")\n        \n        # Check if shape is a table\n        if shape_orig.shape_type == MSO_SHAPE_TYPE.TABLE and shape_mod.shape_type == MSO_SHAPE_TYPE.TABLE:\n            table_orig = shape_orig.table\n            table_mod = shape_mod.table\n            \n            if len(table_orig.rows) != len(table_mod.rows):\n                differences.append(f\"Shape {idx}: table row count differs ({len(table_orig.rows)} vs {len(table_mod.rows)})\")\n            if len(table_orig.columns) != len(table_mod.columns):\n                differences.append(f\"Shape {idx}: table column count differs ({len(table_orig.columns)} vs {len(table_mod.columns)})\")\n            \n            # Compare table cell contents\n            for row_idx, (row_orig, row_mod) in enumerate(zip(table_orig.rows, table_mod.rows)):\n                for col_idx, (cell_orig, cell_mod) in enumerate(zip(row_orig.cells, row_mod.cells)):\n                    if cell_orig.text != cell_mod.text:\n                        differences.append(f\"Shape {idx}: table cell ({row_idx},{col_idx}) text differs ('{cell_orig.text}' vs '{cell_mod.text}')\")\n        \n        # Check if shape is a group\n        if shape_orig.shape_type == MSO_SHAPE_TYPE.GROUP and shape_mod.shape_type == MSO_SHAPE_TYPE.GROUP:\n            if len(shape_orig.shapes) != len(shape_mod.shapes):\n                differences.append(f\"Shape {idx}: group shape count differs ({len(shape_orig.shapes)} vs {len(shape_mod.shapes)})\")\n\n        # Check slide background\n        if hasattr(slide_original, 'background') and hasattr(slide_modified, 'background'):\n            if slide_original.background.fill.type != slide_modified.background.fill.type:\n                differences.append(\"Slide background fill type differs\")\n\n    if differences:\n        return ('; '.join(differences[:5]), 0.0)  # Return first 5 differences to avoid overly long messages\n    \n    return ('No differences found', 1.0)\n"
        },
        "score": 1.0,
        "reason": "No differences found"
      },
      {
        "name": "No Extraneous Changes to Other Slides",
        "description": "Verifies that slides other than slide 2 remain unchanged.",
        "is_critical": false,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    if ppt_diff.added_slides or ppt_diff.removed_slides:\n        return \"Slides were added or removed.\", 0.0\n    # Any modified slides other than slide 2?\n    total = 0\n    for s1, _ in ppt_diff.modified_slides:\n        if s1.slide_number == 2:\n            continue\n        else:\n            total += 1\n\n    score = 1 - (total / (len(original_ppt_screenshots) - 1))\n    if score < 1.0:\n        return (f\"Some other slides were modified.\", score)\n    return (\"No extraneous changes to other slides.\", 1.0)\n"
        },
        "score": 1.0,
        "reason": "No extraneous changes to other slides."
      }
    ],
    "score": 1.0,
    "reason": "The criterion received a perfect score because all required formatting changes were successfully implemented. The three bullets describing types of worms on slide 2 were correctly identified and converted into a numbered list format as requested. Additionally, the task was completed cleanly with no unwanted modifications made to other content on slide 2 or to any other slides in the presentation."
  },
  "metadata": {
    "task": "In slide 2, format the last three bullets which detail the types of worms as a numbered list"
  }
}