{
  "root": {
    "name": "Add Team Member to Contributors Slide",
    "description": "Evaluates whether 'Dr. Sarah Johnson' with role 'Pediatric Ophthalmologist' was successfully added to slide 1 (contributors slide) without causing unintended changes",
    "is_critical": false,
    "metadata": {},
    "children": [
      {
        "name": "Core Task Completion",
        "description": "Verifies that the specified team member was correctly added to slide 1 with proper name and role",
        "is_critical": true,
        "metadata": {},
        "children": [
          {
            "name": "Team Member Added to Slide 1",
            "description": "Checks if 'Dr. Sarah Johnson' was added to slide 1 (contributors slide)",
            "is_critical": true,
            "metadata": {},
            "scorer": {
              "type": "function",
              "function_code": "def compute_score() -> tuple[str, float]:\n    from pptx import Presentation\n    \n    try:\n        prs = Presentation(modified_ppt_path)\n        if len(prs.slides) == 0:\n            return \"No slides found in presentation\", 0.0\n        \n        slide = prs.slides[0]  # Slide 1 (index 0)\n        slide_text = \"\"\n        \n        # Extract all text from the slide\n        for shape in slide.shapes:\n            if hasattr(shape, 'text'):\n                slide_text += shape.text + \" \"\n        \n        slide_text = slide_text.lower()\n        \n        # Check if Dr. Sarah Johnson is mentioned\n        if \"dr. sarah johnson\" in slide_text or \"sarah johnson\" in slide_text:\n            return \"Dr. Sarah Johnson found on slide 1\", 1.0\n        else:\n            return \"Dr. Sarah Johnson not found on slide 1\", 0.0\n            \n    except Exception as e:\n        return f\"Error checking slide content: {str(e)}\", 0.0\n"
            }
          },
          {
            "name": "Correct Role Assignment",
            "description": "Verifies that 'Pediatric Ophthalmologist' role is associated with Dr. Sarah Johnson",
            "is_critical": true,
            "metadata": {},
            "scorer": {
              "type": "function",
              "function_code": "def compute_score() -> tuple[str, float]:\n    from pptx import Presentation\n    \n    try:\n        prs = Presentation(modified_ppt_path)\n        if len(prs.slides) == 0:\n            return \"No slides found in presentation\", 0.0\n        \n        slide = prs.slides[0]  # Slide 1 (index 0)\n        slide_text = \"\"\n        \n        # Extract all text from the slide\n        for shape in slide.shapes:\n            if hasattr(shape, 'text'):\n                slide_text += shape.text + \" \"\n        \n        slide_text = slide_text.lower()\n        \n        # Check if both name and role are present\n        has_name = \"dr. sarah johnson\" in slide_text or \"sarah johnson\" in slide_text\n        has_role = \"pediatric ophthalmologist\" in slide_text\n        \n        if has_name and has_role:\n            return \"Both Dr. Sarah Johnson and Pediatric Ophthalmologist found on slide 1\", 1.0\n        elif has_name:\n            return \"Dr. Sarah Johnson found but role 'Pediatric Ophthalmologist' missing\", 0.5\n        else:\n            return \"Neither name nor role found on slide 1\", 0.0\n            \n    except Exception as e:\n        return f\"Error checking role assignment: {str(e)}\", 0.0\n"
            }
          }
        ]
      },
      {
        "name": "Visual Integration",
        "description": "Assesses if the new team member is visually integrated well with existing contributors using screenshot comparison",
        "is_critical": false,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    if not modified_ppt_screenshots or len(modified_ppt_screenshots) == 0:\n        return \"No modified screenshots available\", 0.0\n    \n    # Find slide 1 screenshot\n    slide1_screenshot = None\n    for screenshot in modified_ppt_screenshots:\n        if screenshot.slide_number == 1:\n            slide1_screenshot = screenshot\n            break\n    \n    if not slide1_screenshot:\n        return \"Slide 1 screenshot not found\", 0.0\n    \n    prompt = \"\"\"Analyze this PowerPoint slide that should be a contributors/team members slide. \n    Look for 'Dr. Sarah Johnson' with role 'Pediatric Ophthalmologist'. \n    Evaluate how well this new team member is visually integrated:\n    \n    1. Is the formatting consistent with other team members?\n    2. Is the layout appropriate and not disrupting the overall design?\n    3. Does it look professionally integrated?\n    \n    Rate the visual integration quality from 0.0 to 1.0 where:\n    - 1.0 = Perfectly integrated, consistent formatting, professional appearance\n    - 0.7-0.9 = Well integrated with minor formatting inconsistencies\n    - 0.4-0.6 = Adequately integrated but noticeable formatting issues\n    - 0.1-0.3 = Poorly integrated, major formatting problems\n    - 0.0 = Not integrated or severely disrupts slide layout\n    \n    Respond with just the score (e.g., 0.8) followed by a brief explanation.\"\"\"\n    \n    try:\n        response = vlm_call(prompt, [slide1_screenshot.image_path], temperature=0.3)\n\n        # Robust score extraction supporting forms like: 0.8, 80%, 8/10, Score: 0.8\n        def _extract_score(text: str) -> float | None:\n            import re\n            if not text:\n                return None\n            t = text.strip()\n            # 1) Percentage like 85%\n            m = re.search(r\"(\\d+(?:\\.\\d+)?)\\s*%\", t)\n            if m:\n                val = float(m.group(1)) / 100.0\n                return max(0.0, min(1.0, val))\n            # 2) Fraction like 8/10 or 0.8/1.0\n            m = re.search(r\"(\\d+(?:\\.\\d+)?)\\s*/\\s*(\\d+(?:\\.\\d+)?)\", t)\n            if m:\n                num = float(m.group(1))\n                den = float(m.group(2))\n                if den != 0:\n                    val = num / den\n                    return max(0.0, min(1.0, val))\n            # 3) Labeled score \"score: 0.8\"\n            m = re.search(r\"score[^\\d]*(\\d+(?:\\.\\d+)?)\", t, re.IGNORECASE)\n            if m:\n                val = float(m.group(1))\n                if val > 1.0 and val <= 100.0:\n                    val = val / 100.0\n                return max(0.0, min(1.0, val))\n            # 4) Any standalone float; prefer first in [0,1]\n            nums = re.findall(r\"\\d+(?:\\.\\d+)?\", t)\n            for n in nums:\n                try:\n                    v = float(n)\n                except Exception:\n                    continue\n                if 0.0 <= v <= 1.0:\n                    return v\n            # 5) If we saw a number <= 100, treat as percent\n            for n in nums:\n                try:\n                    v = float(n)\n                except Exception:\n                    continue\n                if 0.0 <= v <= 100.0:\n                    return max(0.0, min(1.0, v / 100.0))\n            return None\n\n        score = _extract_score(response)\n        if score is None:\n            return f\"Could not parse score from response: {response}\", 0.5\n        # Clamp and craft explanation\n        score = max(0.0, min(1.0, score))\n        lines = response.strip().split('\\n')\n        explanation = ' '.join(lines[1:]).strip() if len(lines) > 1 else \"Visual integration assessed\"\n        if not explanation:\n            explanation = \"Visual integration assessed\"\n        return f\"Visual integration score: {score} - {explanation}\", score\n\n    except Exception as e:\n        return f\"Error in visual assessment: {str(e)}\", 0.5\n"
        }
      },
      {
        "name": "No Unintended Changes",
        "description": "Ensures that no unintended modifications were made to other slides or slide elements",
        "is_critical": false,
        "metadata": {},
        "children": [
          {
            "name": "Other Slides Unchanged",
            "description": "Verifies that slides other than slide 1 were not modified",
            "is_critical": false,
            "metadata": {},
            "scorer": {
              "type": "function",
              "function_code": "def compute_score() -> tuple[str, float]:\n    # Check if any slides other than slide 1 were modified\n    modified_slides_other_than_1 = []\n    \n    for old_slide, new_slide in ppt_diff.modified_slides:\n        if old_slide.slide_number != 1 and new_slide.slide_number != 1:\n            modified_slides_other_than_1.append((old_slide, new_slide))\n    \n    # Check for added/removed slides\n    added_slides_count = len(ppt_diff.added_slides)\n    removed_slides_count = len(ppt_diff.removed_slides)\n    \n    total_unintended_changes = len(modified_slides_other_than_1) + added_slides_count + removed_slides_count\n    \n    if total_unintended_changes == 0:\n        return \"No unintended changes to other slides detected\", 1.0\n    else:\n        penalty = min(1.0, total_unintended_changes * 0.2)  # 0.2 penalty per unintended change\n        score = max(0.0, 1.0 - penalty)\n        return f\"Found {total_unintended_changes} unintended slide changes (modified: {len(modified_slides_other_than_1)}, added: {added_slides_count}, removed: {removed_slides_count})\", score\n"
            }
          },
          {
            "name": "No Unintended Animations or Transitions",
            "description": "Checks that no animations or transitions were unintentionally added, removed, or modified",
            "is_critical": false,
            "metadata": {},
            "scorer": {
              "type": "function",
              "function_code": "def compute_score() -> tuple[str, float]:\n    # Count animation and transition changes\n    animation_changes = (len(ppt_diff.added_animations) + \n                        len(ppt_diff.removed_animations) + \n                        len(ppt_diff.modified_animations))\n    \n    transition_changes = (len(ppt_diff.added_transitions) + \n                         len(ppt_diff.removed_transitions) + \n                         len(ppt_diff.modified_transitions))\n    \n    total_changes = animation_changes + transition_changes\n    \n    if total_changes == 0:\n        return \"No unintended animation or transition changes detected\", 1.0\n    else:\n        # Small penalty for each unintended change\n        penalty = min(1.0, total_changes * 0.1)  # 0.1 penalty per change\n        score = max(0.0, 1.0 - penalty)\n        return f\"Found {total_changes} unintended animation/transition changes (animations: {animation_changes}, transitions: {transition_changes})\", score\n"
            }
          }
        ]
      }
    ]
  },
  "metadata": {
    "task": "Add a new team member to the contributors slide (slide 1) with the name 'Dr. Sarah Johnson' and role 'Pediatric Ophthalmologist'",
    "compute_strategy": "default",
    "non_critical_weight": 0.3
  }
}