{
  "root": {
    "name": "Insert SmartArt graphic for accounting equation on slide 3",
    "description": "Evaluates if the agent successfully inserted a SmartArt graphic on slide 3 to visually represent the accounting equation (Assets = Capital + Liabilities) using a balance.",
    "is_critical": true,
    "metadata": {},
    "children": [
      {
        "name": "SmartArt insertion on slide 3",
        "description": "Checks that a SmartArt graphic exists on slide 3.",
        "is_critical": true,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    from pptx import Presentation\n    ppt = Presentation(modified_ppt_path)\n    slide = ppt.slides[2] if len(ppt.slides) > 2 else None\n    if not slide:\n        return \"Slide 3 does not exist.\", 0.0\n    \n    # Look for SmartArt/Diagram shapes - they often have names like \"Diagram X\" or have shape_type None\n    smartart_shapes = []\n    for shape in slide.shapes:\n        if (hasattr(shape, 'name') and 'diagram' in shape.name.lower()) or \\\n           (shape.shape_type is None):\n            smartart_shapes.append(shape)\n    \n    if smartart_shapes:\n        return f\"SmartArt/Diagram found on slide 3 (found {len(smartart_shapes)} diagram shape(s)).\", 1.0\n    return \"No SmartArt/Diagram found on slide 3.\", 0.0\n"
        }
      },
      {
        "name": "SmartArt graphic represents accounting equation as a balance",
        "description": "Checks that the inserted SmartArt visually represents Assets, Capital, and Liabilities using a balance and correct relationships.",
        "is_critical": true,
        "metadata": {},
        "children": [
          {
            "name": "Contains correct labels (Assets, Capital, Liabilities)",
            "description": "Checks if SmartArt contains labels for Assets, Capital, and Liabilities.",
            "is_critical": true,
            "metadata": {},
            "scorer": {
              "type": "function",
              "function_code": "def compute_score() -> tuple[str, float]:\n    from pptx import Presentation\n    ppt = Presentation(modified_ppt_path)\n    slide = ppt.slides[2] if len(ppt.slides) > 2 else None\n    if not slide:\n        return \"Slide 3 does not exist.\", 0.0\n    \n    # Extract text from all shapes on the slide to check for labels\n    all_text = \"\"\n    for shape in slide.shapes:\n        try:\n            if hasattr(shape, 'text_frame') and shape.text_frame:\n                all_text += shape.text_frame.text.lower() + \" \"\n        except:\n            pass\n    \n    # Check for accounting-related terms (more flexible since this is non-critical)\n    accounting_terms = ['assets', 'capital', 'liabilities', 'asset', 'liability', 'equity', 'debit', 'credit', 'equation', 'balance']\n    found_terms = [term for term in accounting_terms if term in all_text]\n    \n    # More flexible scoring since this is non-critical\n    if len(found_terms) >= 3:\n        return f\"Multiple accounting terms found on slide ({len(found_terms)} terms), suggesting accounting context.\", 1.0\n    elif len(found_terms) >= 1:\n        return f\"Some accounting terms found ({len(found_terms)} terms), partial context present.\", 0.7\n    else:\n        return \"No clear accounting terms found, but SmartArt may still represent the concept visually.\", 0.4\n"
            }
          },
          {
            "name": "SmartArt structure reflects a balance relationship",
            "description": "Checks if the SmartArt visually sets up a relationship showing Assets = Capital + Liabilities, with a balance theme (e.g., horizontal or weighing diagram).",
            "is_critical": true,
            "metadata": {},
            "scorer": {
              "type": "function",
              "function_code": "def compute_score() -> tuple[str, float]:\n    from pptx import Presentation\n    ppt = Presentation(modified_ppt_path)\n    slide = ppt.slides[2] if len(ppt.slides) > 2 else None\n    if not slide:\n        return \"Slide 3 does not exist.\", 0.0\n    \n    # Look for SmartArt/Diagram shapes\n    smartart_shapes = []\n    for shape in slide.shapes:\n        if (hasattr(shape, 'name') and 'diagram' in shape.name.lower()) or \\\n           (shape.shape_type is None) or \\\n           (hasattr(shape, 'name') and any(term in shape.name.lower() for term in ['smartart', 'chart', 'graphic'])):\n            smartart_shapes.append(shape)\n    \n    if not smartart_shapes:\n        return \"No SmartArt/Diagram present on slide 3.\", 0.0\n    \n    # Use VLM to check if the slide contains a balance/scale image\n    # First find the slide 3 screenshot\n    slide_screenshot = None\n    for s in modified_ppt_screenshots:\n        if s.slide_number == 3:\n            slide_screenshot = s.image_path\n            break\n    \n    if slide_screenshot:\n        vlm_prompt = \"\"\"Look at this PowerPoint slide image. Does it contain a visual representation of a balance, scale, or weighing device that could represent the accounting equation concept of balance between assets and liabilities/equity?\n\nLook for:\n- Scale/balance imagery (traditional weighing scales)\n- Seesaw or balance beam representations  \n- Any visual metaphor showing balance or equilibrium\n- Diagrams showing equal sides or balanced elements\n\nAnswer with 'YES' if you can see clear balance/scale imagery that represents the accounting equation balance concept, or 'NO' if there is no such visual representation.\"\"\"\n        \n        try:\n            vlm_response = vlm_call(vlm_prompt, [slide_screenshot], temperature=0.2, max_tokens=50)\n            if vlm_response and 'yes' in vlm_response.lower():\n                return \"VLM confirms SmartArt contains balance/scale imagery representing the accounting equation balance.\", 1.0\n            else:\n                # Fallback to text-based check if VLM doesn't detect visual balance\n                balance_keywords = ['balance', 'weigh', 'scales', '=', 'equation', 'assets', 'capital', 'liabilities']\n                \n                for shape in smartart_shapes:\n                    shape_text = ''\n                    # Try to extract text from the SmartArt\n                    try:\n                        if hasattr(shape, 'text_frame') and shape.text_frame:\n                            shape_text += shape.text_frame.text.lower() + ' '\n                    except:\n                        pass\n                    \n                    # Check for balance/equation indicators in text\n                    count = sum(1 for k in balance_keywords if k in shape_text)\n                    if count >= 2:\n                        return \"SmartArt contains balance-related text but no clear visual balance imagery detected by VLM.\", 0.7\n                \n                return \"SmartArt/Diagram present but VLM confirms no clear balance/scale imagery or sufficient balance-related content.\", 0.3\n                \n        except Exception as e:\n            # Fallback to original logic if VLM fails\n            balance_keywords = ['balance', 'weigh', 'scales', '=', 'equation', 'assets', 'capital', 'liabilities']\n            \n            for shape in smartart_shapes:\n                shape_text = ''\n                try:\n                    if hasattr(shape, 'text_frame') and shape.text_frame:\n                        shape_text += shape.text_frame.text.lower() + ' '\n                except:\n                    pass\n                \n                count = sum(1 for k in balance_keywords if k in shape_text)\n                if count >= 2 or ('balance' in shape.name.lower() if hasattr(shape, 'name') else False):\n                    return \"SmartArt likely reflects balance relationship (VLM check failed, used text analysis).\", 0.8\n            \n            return \"SmartArt structure appears suitable for representing balance (VLM check failed).\", 0.6\n    else:\n        # No screenshot available, fall back to text analysis\n        balance_keywords = ['balance', 'weigh', 'scales', '=', 'equation', 'assets', 'capital', 'liabilities']\n        \n        for shape in smartart_shapes:\n            shape_text = ''\n            try:\n                if hasattr(shape, 'text_frame') and shape.text_frame:\n                    shape_text += shape.text_frame.text.lower() + ' '\n            except:\n                pass\n            \n            count = sum(1 for k in balance_keywords if k in shape_text)\n            if count >= 2 or ('balance' in shape.name.lower() if hasattr(shape, 'name') else False):\n                return \"SmartArt likely reflects balance relationship (no screenshot available for VLM).\", 0.8\n        \n        return \"SmartArt structure appears suitable for representing balance (no screenshot available).\", 0.6\n"
            }
          }
        ]
      },
      {
        "name": "Slide 3 layout and extraneous objects",
        "description": "Checks slide 3 for unnecessary changes and presence of extraneous visual elements unrelated to the task.",
        "is_critical": false,
        "metadata": {},
        "children": [
          {
            "name": "No unnecessary graphical or text elements added",
            "description": "Verifies that slide 3 does not contain added shapes, images, or text unrelated to the accounting equation balance SmartArt.",
            "is_critical": false,
            "metadata": {},
            "scorer": {
              "type": "function",
              "function_code": "def compute_score() -> tuple[str, float]:\n    # Compare shapes/text on slide 3 in original and modified\n    from pptx import Presentation\n    ppt_orig = Presentation(original_ppt_path)\n    ppt_mod = Presentation(modified_ppt_path)\n    slide_idx = 2\n    if len(ppt_orig.slides) <= slide_idx or len(ppt_mod.slides) <= slide_idx:\n        return \"Slide 3 missing in one of presentations.\", 0.0\n    \n    orig_slide = ppt_orig.slides[slide_idx]\n    mod_slide = ppt_mod.slides[slide_idx]\n    \n    # Count shape types (handle None types for SmartArt/Diagrams)\n    orig_shape_types = []\n    for s in orig_slide.shapes:\n        shape_type = s.shape_type if s.shape_type is not None else \"DIAGRAM\"\n        orig_shape_types.append(shape_type)\n    \n    mod_shape_types = []\n    for s in mod_slide.shapes:\n        shape_type = s.shape_type if s.shape_type is not None else \"DIAGRAM\"\n        mod_shape_types.append(shape_type)\n    \n    # Check if only expected shapes were added (SmartArt/Diagram)\n    orig_count = len(orig_shape_types)\n    mod_count = len(mod_shape_types)\n    \n    if mod_count == orig_count + 1:\n        # One shape added - check if it's a diagram\n        new_shapes = [s for s in mod_slide.shapes if s.shape_type is None or 'diagram' in getattr(s, 'name', '').lower()]\n        if new_shapes:\n            return \"Only SmartArt/Diagram added to slide - no extraneous elements.\", 1.0\n        else:\n            return \"Non-SmartArt shape added to slide.\", 0.5\n    elif mod_count == orig_count:\n        return \"No new shapes added to slide.\", 1.0\n    elif mod_count > orig_count + 1:\n        return f\"Multiple shapes added ({mod_count - orig_count}), may include extraneous elements.\", 0.0\n    else:\n        return \"Shapes were removed from slide.\", 0.8\n"
            }
          }
        ]
      },
      {
        "name": "Other slides unchanged",
        "description": "Verifies that other slides were not modified (no slide insertions, deletions, or unrelated edits to other slides).",
        "is_critical": false,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    # Using ppt_diff metadata\n    if ppt_diff.added_slides or ppt_diff.removed_slides:\n        return \"Slides have been added or removed.\", 0.0\n    edited_slides = [pair for pair in ppt_diff.modified_slides if pair[0].slide_number != 3]\n    if edited_slides:\n        return \"Other slides were edited.\", 0.0\n    return \"No changes to other slides.\", 1.0\n"
        }
      }
    ]
  },
  "metadata": {
    "task": "Insert a SmartArt graphic on slide 3 to represent the accounting equation (Assets = Capital + Liabilities) using a balance"
  }
}