{
  "root": {
    "name": "theme_change_with_color_preservation",
    "description": "Evaluates if a different design theme was applied to all slides while maintaining the greenish background color scheme.",
    "is_critical": true,
    "metadata": {},
    "children": [
      {
        "name": "theme_changed_entire_presentation",
        "description": "Checks if a different design theme was applied to all slides in the presentation.",
        "is_critical": true,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    from pptx import Presentation\n    import os\n    reason = \"\"\n    # Load original and modified presentations\n    prs_orig = Presentation(original_ppt_path)\n    prs_mod = Presentation(modified_ppt_path)\n    # Get original and modified theme signatures using multiple methods\n    def get_theme_signature(prs):\n        \"\"\"Get a comprehensive theme signature\"\"\"\n        signature = {}\n        \n        # Method 1: Number of slide layouts (most reliable)\n        signature['layout_count'] = len(prs.slide_layouts)\n        \n        # Method 2: Layout names (very reliable for theme detection)\n        signature['layout_names'] = tuple(sorted([layout.name for layout in prs.slide_layouts]))\n        \n        # Method 3: Slide master relationship count\n        signature['master_rel_count'] = len(prs.slide_master.part.rels)\n        \n        # Method 4: Theme relationship ID\n        theme_rel_id = None\n        for rel_id, rel in prs.slide_master.part.rels.items():\n            if 'theme' in rel.reltype:\n                theme_rel_id = rel_id\n                break\n        signature['theme_rel_id'] = theme_rel_id\n        \n        return signature\n    \n    theme_sig_orig = get_theme_signature(prs_orig)\n    theme_sig_mod = get_theme_signature(prs_mod)\n    \n    # Check if theme changed using multiple indicators\n    theme_changed = (\n        theme_sig_orig['layout_count'] != theme_sig_mod['layout_count'] or\n        theme_sig_orig['layout_names'] != theme_sig_mod['layout_names'] or\n        theme_sig_orig['master_rel_count'] != theme_sig_mod['master_rel_count']\n    )\n    \n    if not theme_changed:\n        reason = f\"Theme unchanged. Layout count: {theme_sig_orig['layout_count']}, Layout names match: {theme_sig_orig['layout_names'][:3]}...\"\n        return reason, 0.0\n    \n    reason = f\"Theme changed: Layout count {theme_sig_orig['layout_count']} → {theme_sig_mod['layout_count']}, different layout names, and applied to all slides.\"\n    return reason, 1.0\n"
        }
      },
      {
        "name": "color_scheme_preserved",
        "description": "Checks if the greenish background color scheme was preserved across all slides after theme change.",
        "is_critical": true,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    def theme_changed() -> tuple[str, float]:\n        from pptx import Presentation\n        import os\n        reason = \"\"\n        # Load original and modified presentations\n        prs_orig = Presentation(original_ppt_path)\n        prs_mod = Presentation(modified_ppt_path)\n        # Get original and modified theme signatures using multiple methods\n        def get_theme_signature(prs):\n            \"\"\"Get a comprehensive theme signature\"\"\"\n            signature = {}\n            \n            # Method 1: Number of slide layouts (most reliable)\n            signature['layout_count'] = len(prs.slide_layouts)\n            \n            # Method 2: Layout names (very reliable for theme detection)\n            signature['layout_names'] = tuple(sorted([layout.name for layout in prs.slide_layouts]))\n            \n            # Method 3: Slide master relationship count\n            signature['master_rel_count'] = len(prs.slide_master.part.rels)\n            \n            # Method 4: Theme relationship ID\n            theme_rel_id = None\n            for rel_id, rel in prs.slide_master.part.rels.items():\n                if 'theme' in rel.reltype:\n                    theme_rel_id = rel_id\n                    break\n            signature['theme_rel_id'] = theme_rel_id\n            \n            return signature\n        \n        theme_sig_orig = get_theme_signature(prs_orig)\n        theme_sig_mod = get_theme_signature(prs_mod)\n        \n        # Check if theme changed using multiple indicators\n        theme_changed = (\n            theme_sig_orig['layout_count'] != theme_sig_mod['layout_count'] or\n            theme_sig_orig['layout_names'] != theme_sig_mod['layout_names'] or\n            theme_sig_orig['master_rel_count'] != theme_sig_mod['master_rel_count']\n        )\n        \n        if not theme_changed:\n            reason = f\"Theme unchanged. Layout count: {theme_sig_orig['layout_count']}, Layout names match: {theme_sig_orig['layout_names'][:3]}...\"\n            return reason, 0.0\n        \n        reason = f\"Theme changed: Layout count {theme_sig_orig['layout_count']} → {theme_sig_mod['layout_count']}, different layout names, and applied to all slides.\"\n        return reason, 1.0\n    \n    if theme_changed()[1] == 0.0:\n        return \"Theme unchanged; skipping color check.\", 0.0\n    \n    # If no slides were modified, return 0.0 as requested\n    try:\n        modified_pairs = getattr(ppt_diff, 'modified_slides', [])\n    except Exception:\n        modified_pairs = []\n\n    if not modified_pairs:\n        return \"No slides were modified; skipping color check.\", 0.0\n\n    # Collect unique modified slide numbers\n    modified_slide_numbers = []\n    seen = set()\n    for pair in modified_pairs:\n        try:\n            # pair can be (orig, mod)\n            mod_slide = pair[1]\n            s_num = getattr(mod_slide, 'slide_number', None)\n            if s_num is not None and s_num not in seen:\n                seen.add(s_num)\n                modified_slide_numbers.append(s_num)\n        except Exception:\n            continue\n\n    if not modified_slide_numbers:\n        return \"No identifiable modified slide numbers; skipping color check.\", 0.0\n\n    # Sample up to 4 modified slides\n    import random\n    sample_count = min(4, len(modified_slide_numbers))\n    sampled_numbers = random.sample(modified_slide_numbers, sample_count)\n\n    # Build list of image paths for sampled slides\n    def _get_image_path(s):\n        try:\n            path = getattr(s, 'image_path', None)\n        except Exception:\n            path = None\n        if not path and isinstance(s, dict):\n            path = s.get('image_path') or s.get('path')\n        return path\n\n    images = []\n    matched_numbers = []\n    for num in sampled_numbers:\n        # find the screenshot with matching slide_number\n        found = None\n        for s in modified_ppt_screenshots:\n            try:\n                s_num = getattr(s, 'slide_number', None)\n                if s_num is None and isinstance(s, dict):\n                    s_num = s.get('slide_number')\n                if s_num == num:\n                    found = _get_image_path(s)\n                    break\n            except Exception:\n                continue\n        if found:\n            images.append(found)\n            matched_numbers.append(num)\n\n    if not images:\n        return \"No screenshots found for modified slides; cannot check color scheme.\", 0.0\n\n    # Single VLM call for all sampled images\n    prompt = (\n        \"For each image in order, does the slide have a greenish background? \"\n        \"Shades of blue-green are also fine. Respond with exactly one line per image (delimited by a new line): \"\n        \"'YES' or 'NO' only (no extra text).\"\n    )\n\n    try:\n        response = vlm_call(images=images, prompt=prompt, temperature=0.1)\n        text = str(response).strip()\n\n        # Robustly parse up to len(images) YES/NO tokens from the response\n        import re\n        tokens = re.findall(r\"\\b(yes|no)\\b\", text, flags=re.IGNORECASE)\n        tokens = [t.lower() for t in tokens][: len(images)]\n        # If fewer tokens than images, pad with 'no'\n        if len(tokens) < len(images):\n            tokens += ['no'] * (len(images) - len(tokens))\n\n        greenish_count = sum(1 for t in tokens if t == 'yes')\n        total = len(images)\n        ratio = greenish_count / total if total else 0.0\n\n        if ratio > 0.9:\n            return f\"{greenish_count}/{total} modified slides preserve greenish backgrounds (sampled).\", 1.0\n        elif ratio > 0.7:\n            return f\"{greenish_count}/{total} modified slides preserve greenish backgrounds (sampled).\", 0.7\n        else:\n            return f\"Only {greenish_count}/{total} modified slides preserve greenish backgrounds (sampled).\", 0.0\n    except Exception as e:\n        return f\"Error during VLM color check: {str(e)}\", 0.0\n"
        }
      }
    ]
  },
  "metadata": {
    "task": "Apply a different design theme to the entire presentation while maintaining the current color scheme of greenish backgrounds"
  }
}