{
  "root": {
    "name": "Change font size of main title on first slide to make it larger",
    "description": "Evaluates whether the agent successfully enlarged the font size of the main title 'Texts and Human Experiences' on the first slide, without introducing any extraneous changes.",
    "is_critical": true,
    "metadata": {},
    "children": [
      {
        "name": "Font size of main title is increased on first slide",
        "description": "Checks that the font size of the main title 'Texts and Human Experiences' on the first slide was increased.",
        "is_critical": true,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    from pptx import Presentation\n    from pptx.enum.shapes import PP_PLACEHOLDER\n    from pptx.oxml.ns import qn\n    import difflib\n    import os\n    \n    title_text = \"Texts and Human Experiences\"\n    \n    # Use globally-defined paths; ensure they exist\n    global original_ppt_path, modified_ppt_path  # noqa: F821\n    try:  # runtime guard\n        _ = original_ppt_path  # noqa: F821\n        _ = modified_ppt_path  # noqa: F821\n    except NameError:\n        return \"Global original_ppt_path / modified_ppt_path not defined in runtime scope.\", 0.0\n\n    # Load original and modified presentations\n    orig_prs = Presentation(original_ppt_path)\n    mod_prs = Presentation(modified_ppt_path)\n    # Get first slide from both\n    orig_slide = orig_prs.slides[0]\n    mod_slide = mod_prs.slides[0]\n    \n    def find_title_shape(slide):\n        phrase = title_text.strip().lower()\n        candidates = []\n        for idx, shape in enumerate(slide.shapes):\n            if not getattr(shape, 'has_text_frame', False):\n                continue\n            try:\n                raw_text = shape.text_frame.text or \"\"\n            except Exception:\n                continue\n            text_lower = raw_text.lower()\n            contains_phrase = phrase in text_lower\n            tokens = [t for t in phrase.split() if t]\n            token_overlap = sum(1 for t in tokens if t in text_lower)\n            placeholder_rank = 0\n            ph_type = None\n            if getattr(shape, 'is_placeholder', False):\n                try:\n                    ph_type = shape.placeholder_format.type\n                    if ph_type in (PP_PLACEHOLDER.TITLE, getattr(PP_PLACEHOLDER, 'CENTER_TITLE', None)):\n                        placeholder_rank = 3\n                    else:\n                        placeholder_rank = 1\n                except Exception:\n                    pass\n            # Heuristic score\n            score = (10 if contains_phrase else 0) + placeholder_rank*5 + token_overlap + len(raw_text)/120.0\n            candidates.append((score, idx, shape, raw_text[:120].replace('\\n',' / ')))\n        if not candidates:\n            return None\n        candidates.sort(key=lambda x: x[0], reverse=True)\n        if os.environ.get(\"DEBUG_FONT\") == \"1\":\n            for sc, i, sh, snip in candidates:\n                try:\n                    print(f\"[FIND] score={sc:.2f} idx={i} placeholder={getattr(sh.placeholder_format,'type',None) if getattr(sh,'is_placeholder',False) else None} text='{snip}'\")\n                except Exception:\n                    pass\n        return candidates[0][2]\n\n    orig_title = find_title_shape(orig_slide)\n    mod_title = find_title_shape(mod_slide)\n    if orig_title is None or mod_title is None:\n        return \"Could not locate the main title in one or both presentations.\", 0.0\n\n    # Inheritance-aware effective font size resolution.\n    # PowerPoint font size precedence roughly:\n    #   Run rPr.sz > Paragraph a:pPr/a:defRPr.sz > Layout text style > Master text style.\n    # python-pptx only exposes explicitly-set sizes; inherited ones stay None, so we need XML inspection.\n    NS = {\"p\": \"http://schemas.openxmlformats.org/presentationml/2006/main\",\n          \"a\": \"http://schemas.openxmlformats.org/drawingml/2006/main\"}\n\n    def _presentation_default_style_size(shape, level):\n        \"\"\"Search presentation-level defaultTextStyle for a size (rare but possible).\"\"\"\n        try:\n            pres_el = shape.part.package.presentation_part.element\n        except Exception:\n            return None\n        # lvl is 1-based\n        paths = [\n            f\"./p:defaultTextStyle/a:lvl{level}pPr/a:defRPr/@sz\",\n            f\"./p:defaultTextStyle/a:defPPr/a:defRPr/@sz\",\n        ]\n        for xp in paths:\n            try:\n                vals = pres_el.xpath(xp, namespaces=NS)\n            except TypeError:\n                vals = pres_el.xpath(xp)\n            if vals:\n                try:\n                    return int(vals[0]) / 100.0\n                except ValueError:\n                    return None\n        return None\n\n    def _run_explicit_size(run):\n        rPr = run._r.rPr  # do NOT auto-create\n        if rPr is not None:\n            sz = rPr.get(\"sz\")  # value stored in 1/100 points\n            if sz:\n                return int(sz)/100.0\n        return None\n\n    def _para_default_size(para):\n        pPr = para._p.pPr\n        if pPr is not None:\n            defRPr = pPr.find(qn('a:defRPr'))\n            if defRPr is not None:\n                sz = defRPr.get('sz')\n                if sz:\n                    return int(sz)/100.0\n        return None\n\n    def _para_end_size(para):\n        \"\"\"Size specified on a:endParaRPr (sometimes used).\"\"\"\n        pPr = para._p.pPr\n        if pPr is not None:\n            endRPr = pPr.find(qn('a:endParaRPr'))\n            if endRPr is not None:\n                sz = endRPr.get('sz')\n                if sz:\n                    try:\n                        return int(sz)/100.0\n                    except ValueError:\n                        return None\n        return None\n\n    def _shape_lst_style_size(shape, level):\n        \"\"\"Check shape's own a:lstStyle hierarchy for size (not previously queried).\"\"\"\n        try:\n            txBody = shape.text_frame._txBody  # oxml element\n        except Exception:\n            return None\n        if txBody is None:\n            return None\n        # 1. Level-specific\n        xp_level = f\"./a:lstStyle/a:lvl{level}pPr/a:defRPr/@sz\"\n        # 2. Fallback default paragraph formatting in lstStyle\n        xp_default = \"./a:lstStyle/a:defPPr/a:defRPr/@sz\"\n        for xp in (xp_level, xp_default):\n            try:\n                vals = txBody.xpath(xp, namespaces=NS)\n            except TypeError:\n                vals = txBody.xpath(xp)\n            if vals:\n                try:\n                    return int(vals[0]) / 100.0\n                except ValueError:\n                    return None\n        return None\n\n    def _style_size(shape, level):\n        # level: 1-based paragraph level (para.level is 0-based)\n        slide_layout = shape.part.slide_layout if hasattr(shape.part, 'slide_layout') else None\n        if slide_layout is None:\n            return None\n        layout_el = slide_layout.element\n        master_el = slide_layout.part.slide_master.element\n\n        def pick(container_el, style_tag):\n            # XPath for lvlNpPr where N=level\n            xp = f\"./p:{style_tag}/a:lvl{level}pPr/a:defRPr/@sz\"\n            try:\n                vals = container_el.xpath(xp, namespaces=NS)  # newer python-pptx / lxml style\n            except TypeError:\n                # Some versions expose xpath() wrapper without namespaces kw\n                vals = container_el.xpath(xp)\n            # Fallback: namespace-insensitive search if nothing found\n            if not vals:\n                alt = (f\".//*[local-name()='{style_tag}']/*[local-name()='lvl{level}pPr']\" \\\n                       f\"/*[local-name()='defRPr']/@sz\")\n                try:\n                    vals = container_el.xpath(alt)\n                except Exception:\n                    pass\n            if vals:\n                try:\n                    return int(vals[0]) / 100.0\n                except ValueError:\n                    return None\n            # Default paragraph style in style_tag (defPPr)\n            xp_def = f\"./p:{style_tag}/a:defPPr/a:defRPr/@sz\"\n            try:\n                vals2 = container_el.xpath(xp_def, namespaces=NS)\n            except TypeError:\n                vals2 = container_el.xpath(xp_def)\n            if not vals2:\n                alt_def = (f\".//*[local-name()='{style_tag}']/*[local-name()='defPPr']\" \\\n                           f\"/*[local-name()='defRPr']/@sz\")\n                try:\n                    vals2 = container_el.xpath(alt_def)\n                except Exception:\n                    vals2 = []\n            if vals2:\n                try:\n                    return int(vals2[0]) / 100.0\n                except ValueError:\n                    return None\n            return None\n\n        # Decide probable style set (title vs body). Could be expanded for ctrTitle, etc.\n        style_tags = [\"bodyStyle\"]\n        if getattr(shape, 'is_placeholder', False):\n            try:\n                ph_type = shape.placeholder_format.type\n                # Treat both TITLE and CENTER_TITLE as titleStyle\n                if ph_type in (PP_PLACEHOLDER.TITLE, getattr(PP_PLACEHOLDER, 'CENTER_TITLE', None)):\n                    style_tags = [\"titleStyle\", \"bodyStyle\"]  # prefer title style\n            except Exception:\n                pass\n\n        for tag in style_tags:\n            for container in (layout_el, master_el):\n                sz = pick(container, tag)\n                if sz is not None:\n                    return sz\n        return None\n\n    def _autofit_scale(shape):\n        \"\"\"Return fontScale factor if normAutofit present, else 1.0\"\"\"\n        try:\n            txBody = shape.text_frame._txBody\n            try:\n                els = txBody.xpath('./a:bodyPr/a:normAutofit', namespaces=NS)\n            except TypeError:\n                els = txBody.xpath('./a:bodyPr/a:normAutofit')\n            if els:\n                el = els[0]\n                val = el.get('fontScale')\n                if val:\n                    # spec: thousandths of a percent, so 100% == 100000\n                    return int(val)/100000.0\n        except Exception:\n            pass\n        return 1.0\n\n    def get_text_run_details(shape):\n        \"\"\"Get detailed information about each text run with its font size and text content.\"\"\"\n        if not shape.has_text_frame:\n            return []\n        \n        text_runs = []\n        scale = _autofit_scale(shape)\n        \n        for para in shape.text_frame.paragraphs:\n            para_level = (para.level or 0) + 1\n            \n            # Handle paragraphs with runs\n            for run in para.runs:\n                text_content = run.text\n                if not text_content.strip():  # Skip empty runs\n                    continue\n                    \n                sz = (_run_explicit_size(run) or\n                      _para_default_size(para) or\n                      _para_end_size(para) or\n                      _shape_lst_style_size(shape, para_level) or\n                      _style_size(shape, para_level) or\n                      _presentation_default_style_size(shape, para_level))\n                \n                if sz:\n                    effective_size = sz * scale\n                    text_runs.append({\n                        'text': text_content,\n                        'size': effective_size,\n                        'char_count': len(text_content)\n                    })\n            \n            # Handle paragraphs with no runs but text\n            if not para.runs and para.text.strip():\n                sz = (_para_default_size(para) or\n                      _para_end_size(para) or\n                      _shape_lst_style_size(shape, para_level) or\n                      _style_size(shape, para_level) or\n                      _presentation_default_style_size(shape, para_level))\n                \n                if sz:\n                    effective_size = sz * scale\n                    text_runs.append({\n                        'text': para.text,\n                        'size': effective_size,\n                        'char_count': len(para.text)\n                    })\n        \n        return text_runs\n\n    def calculate_partial_score(orig_runs, mod_runs):\n        \"\"\"Calculate partial score based on which text segments had font size increases.\"\"\"\n        if not orig_runs or not mod_runs:\n            return 0.0, \"Could not extract text run details from one or both presentations.\"\n        \n        # Create a mapping of text content to font sizes for comparison\n        orig_text_sizes = {run['text']: run['size'] for run in orig_runs}\n        mod_text_sizes = {run['text']: run['size'] for run in mod_runs}\n        \n        # Calculate total character count for weighting\n        total_chars = sum(run['char_count'] for run in orig_runs)\n        increased_chars = 0\n        unchanged_chars = 0\n        decreased_chars = 0\n        \n        changes_detected = []\n        \n        # Check each text segment for font size changes\n        for text, orig_size in orig_text_sizes.items():\n            if text in mod_text_sizes:\n                mod_size = mod_text_sizes[text]\n                char_count = next((run['char_count'] for run in orig_runs if run['text'] == text), 0)\n                \n                if mod_size > orig_size + 0.1:  # Small tolerance for floating point comparison\n                    increased_chars += char_count\n                    changes_detected.append(f\"'{text}': {orig_size:.1f}pt → {mod_size:.1f}pt (increased)\")\n                elif mod_size < orig_size - 0.1:\n                    decreased_chars += char_count\n                    changes_detected.append(f\"'{text}': {orig_size:.1f}pt → {mod_size:.1f}pt (decreased)\")\n                else:\n                    unchanged_chars += char_count\n                    changes_detected.append(f\"'{text}': {orig_size:.1f}pt (unchanged)\")\n        \n        # Check for new text segments in modified version\n        for text, mod_size in mod_text_sizes.items():\n            if text not in orig_text_sizes:\n                char_count = next((run['char_count'] for run in mod_runs if run['text'] == text), 0)\n                changes_detected.append(f\"'{text}': new text at {mod_size:.1f}pt\")\n        \n        # Calculate score based on proportion of text with increased font size\n        if total_chars == 0:\n            return 0.0, \"No text content found to analyze.\"\n        \n        increase_ratio = increased_chars / total_chars\n        \n        # Determine score and reason\n        if increase_ratio >= 0.9:  # 90% or more of text increased\n            score = 1.0\n            reason = f\"Font size increased for {increase_ratio*100:.1f}% of text. Changes: {'; '.join(changes_detected)}\"\n        elif increase_ratio >= 0.5:  # 50-89% of text increased\n            score = 0.8\n            reason = f\"Font size increased for {increase_ratio*100:.1f}% of text (partial completion). Changes: {'; '.join(changes_detected)}\"\n        elif increase_ratio >= 0.25:  # 25-49% of text increased\n            score = 0.5\n            reason = f\"Font size increased for {increase_ratio*100:.1f}% of text (limited completion). Changes: {'; '.join(changes_detected)}\"\n        elif increase_ratio > 0:  # Some text increased\n            score = 0.3\n            reason = f\"Font size increased for {increase_ratio*100:.1f}% of text (minimal completion). Changes: {'; '.join(changes_detected)}\"\n        else:  # No text increased\n            if decreased_chars > 0:\n                score = 0.0\n                reason = f\"Font size was decreased rather than increased. Changes: {'; '.join(changes_detected)}\"\n            else:\n                score = 0.0\n                reason = f\"No font size increases detected. Changes: {'; '.join(changes_detected)}\"\n        \n        return score, reason\n\n    # Get detailed text run information from both presentations\n    orig_runs = get_text_run_details(orig_title)\n    mod_runs = get_text_run_details(mod_title)\n    \n    # Calculate partial score based on text-level changes\n    score, reason = calculate_partial_score(orig_runs, mod_runs)\n    \n    return reason, score\n"
        }
      },
      {
        "name": "Main title text unchanged except for font size",
        "description": "Checks that the text content of the main title was not modified except for font size.",
        "is_critical": false,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    from pptx import Presentation\n    import difflib\n    title_text = \"Texts and Human Experiences\"\n    orig_prs = Presentation(original_ppt_path)\n    mod_prs = Presentation(modified_ppt_path)\n    orig_slide = orig_prs.slides[0]\n    mod_slide = mod_prs.slides[0]\n    def find_title_shape(slide):\n        for shape in slide.shapes:\n            if shape.has_text_frame:\n                if title_text.strip().lower() in shape.text.strip().lower():\n                    return shape\n        return None\n    orig_title = find_title_shape(orig_slide)\n    mod_title = find_title_shape(mod_slide)\n    if orig_title is None or mod_title is None:\n        return \"Could not locate the main title in one or both presentations.\", 0.0\n    orig_text = orig_title.text.strip()\n    mod_text = mod_title.text.strip()\n    # Use sequence matcher to check for changes\n    if orig_text == mod_text:\n        return \"Title text unchanged.\", 1.0\n    elif difflib.SequenceMatcher(None, orig_text, mod_text).ratio() > 0.95:\n        return \"Title text changed minimally (likely whitespace), partial credit.\", 0.5\n    else:\n        return f\"Title text changed from '{orig_text}' to '{mod_text}'.\", 0.0\n"
        }
      },
      {
        "name": "No extraneous changes made to other elements on first slide",
        "description": "Checks that only the main title was modified, and all other shapes/elements on the first slide remain unchanged.",
        "is_critical": false,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    from pptx import Presentation\n    orig_prs = Presentation(original_ppt_path)\n    mod_prs = Presentation(modified_ppt_path)\n    orig_slide = orig_prs.slides[0]\n    mod_slide = mod_prs.slides[0]\n    title_text = \"Texts and Human Experiences\"\n    # Find main title shape index in both slides\n    def title_shape_idx(slide):\n        for i, shape in enumerate(slide.shapes):\n            if shape.has_text_frame:\n                if title_text.strip().lower() in shape.text.strip().lower():\n                    return i\n        return None\n    orig_title_idx = title_shape_idx(orig_slide)\n    mod_title_idx = title_shape_idx(mod_slide)\n    def shape_summary(shape):\n        if shape.has_text_frame:\n            return shape.text.strip()\n        if hasattr(shape, 'name'):\n            return shape.name\n        return str(type(shape))\n    # Compare all shapes except the title shape\n    unchanged = True\n    for i in range(len(orig_slide.shapes)):\n        if i == orig_title_idx:\n            continue\n        orig_shape_sum = shape_summary(orig_slide.shapes[i])\n        if i < len(mod_slide.shapes):\n            mod_shape_sum = shape_summary(mod_slide.shapes[i])\n        else:\n            unchanged = False\n            break\n        if orig_shape_sum != mod_shape_sum:\n            unchanged = False\n            break\n    if unchanged and len(orig_slide.shapes) == len(mod_slide.shapes):\n        return \"All other shapes on first slide unchanged.\", 1.0\n    else:\n        return \"Other shapes on first slide have been changed.\", 0.0\n"
        }
      },
      {
        "name": "No extraneous changes elsewhere in presentation",
        "description": "Checks that slides other than the first slide, and global presentation properties, were not altered.",
        "is_critical": false,
        "metadata": {},
        "scorer": {
          "type": "function",
          "function_code": "def compute_score() -> tuple[str, float]:\n    from pptx import Presentation\n    orig_prs = Presentation(original_ppt_path)\n    mod_prs = Presentation(modified_ppt_path)\n    # Ignore first slide\n    if len(orig_prs.slides) != len(mod_prs.slides):\n        return \"Number of slides changed.\", 0.0\n    unchanged = True\n    for i in range(1, len(orig_prs.slides)):\n        orig_slide = orig_prs.slides[i]\n        mod_slide = mod_prs.slides[i]\n        # Compare text content for all shapes\n        for j in range(len(orig_slide.shapes)):\n            orig_shape = orig_slide.shapes[j]\n            mod_shape = mod_slide.shapes[j] if j < len(mod_slide.shapes) else None\n            if mod_shape is None:\n                unchanged = False\n                break\n            if orig_shape.has_text_frame and mod_shape.has_text_frame:\n                if orig_shape.text.strip() != mod_shape.text.strip():\n                    unchanged = False\n                    break\n            # Optionally, compare shape types and count\n            if type(orig_shape) != type(mod_shape):\n                unchanged = False\n                break\n        if len(orig_slide.shapes) != len(mod_slide.shapes):\n            unchanged = False\n            break\n    if unchanged:\n        return \"No extraneous changes in other slides.\", 1.0\n    else:\n        return \"Other slides were modified.\", 0.0\n"
        }
      }
    ]
  },
  "metadata": {
    "task": "Change the font size of the main title 'Texts and Human Experiences' on the first slide to make it larger",
    "compute_strategy": "default",
    "critical_node_weight": 0.7
  }
}