"""
Custom template filters for question rendering.

Provides filters for:
- Markdown and LaTeX rendering with MathJax compatibility
- Dictionary and attribute access in templates
- String manipulation utilities
"""

import markdown
from django import template
from django.utils.safestring import mark_safe

register = template.Library()

@register.filter
def split(value, delimiter=','):
    """Split a string by delimiter."""
    if value:
        return value.split(delimiter)
    return []


@register.filter
def get_item(dictionary, key):
    """Get item from dictionary by key."""
    if isinstance(dictionary, dict):
        return dictionary.get(key)
    return None


@register.filter
def get_attr(obj, attr_name):
    """Get attribute from object by name."""
    if hasattr(obj, attr_name):
        return getattr(obj, attr_name)
    return None


def _add_markdown_attribute_to_html_tags(value):
    """
    Add markdown="1" attribute to common HTML tags to enable markdown processing.

    Helper function used by both markdown_latex and markdown_latex_reviews.
    Only adds the attribute if markdown syntax (like links) is detected.
    """
    import re

    # Check if content has markdown links (like [text](url))
    has_markdown_links = re.search(r'\[([^\]]+)\]\(([^)]+)\)', value)

    # Only add markdown="1" if we detect markdown syntax that needs processing
    if has_markdown_links:
        # Add markdown="1" attribute to common HTML tags
        # Only add if the tag doesn't already have markdown="1"
        def add_markdown_attr(match):
            tag = match.group(1)
            attrs = match.group(2) or ''
            if 'markdown=' not in attrs:
                return f'<{tag}{attrs} markdown="1">'
            return match.group(0)

        html_tags_pattern = r'<(p|div|li|ol|ul|blockquote|article|section|aside|td|th)(\s[^>]*)?>'
        value = re.sub(html_tags_pattern, add_markdown_attr, value)

    return value


def markdown_latex(value):
    """
    Render markdown content with proper LaTeX math support using python-markdown-math.

    Used as a Django template filter. Also handles full HTML documents from AI responses
    (e.g., from OpenAI Responses API).
    
    This professional solution:
    1. Uses the dedicated python-markdown-math extension
    2. Properly handles all markdown syntax (lists, bold, italic, code, etc.)
    3. Preserves LaTeX expressions for MathJax rendering
    4. Extracts content from full HTML documents when needed
    5. Processes markdown inside HTML tags from AI responses
    6. Strips common code block delimiters that models sometimes add
    """
    if not value:
        return ""
    
    import re
    
    # Strip common code block delimiters that models sometimes add
    value = value.strip()
    
    # Remove markdown code blocks
    if value.startswith('```markdown') and value.endswith('```'):
        value = value[11:-3].strip()  # Remove ```markdown at start and ``` at end
    elif value.startswith('```') and value.endswith('```'):
        value = value[3:-3].strip()   # Remove ``` at start and end
    
    # Remove Python raw string delimiters
    if value.startswith("r'''") and value.endswith("'''"):
        value = value[4:-3].strip()   # Remove r''' at start and ''' at end
    elif value.startswith("'''") and value.endswith("'''"):
        value = value[3:-3].strip()   # Remove ''' at start and end
    
    # Sanitize LaTeX content to prevent MathJax errors
    def sanitize_latex(latex_content):
        """
        Sanitize LaTeX content to prevent common MathJax rendering errors.
        Returns the sanitized content and a boolean indicating if changes were made.
        """
        original = latex_content

        # Remove invalid mathvariant attributes that MathJax warns about
        # These are deprecated and cause warnings
        latex_content = re.sub(r"mathvariant='double-struck'", "", latex_content)
        latex_content = re.sub(r'mathvariant="double-struck"', '', latex_content)

        # Check for catastrophically unmatched delimiters
        open_braces = latex_content.count('{')
        close_braces = latex_content.count('}')
        open_brackets = latex_content.count('[')
        close_brackets = latex_content.count(']')

        # If severely unmatched (off by more than 3), this might cause index errors
        if abs(open_braces - close_braces) > 3 or abs(open_brackets - close_brackets) > 3:
            # Log for debugging but don't modify - let MathJax error handler catch it
            import sys
            print(f"WARNING: Severely unmatched delimiters in LaTeX (braces: {open_braces}/{close_braces}, brackets: {open_brackets}/{close_brackets})", file=sys.stderr)

        return latex_content

    # Protect display math blocks from having their inline math processed
    # This prevents $...$ inside $$...$$ from being processed separately
    display_blocks = []
    def store_display_block(match):
        idx = len(display_blocks)
        # Sanitize the math content before storing
        math_content = sanitize_latex(match.group(0))
        display_blocks.append(math_content)
        return f'DISPLAYMATHBLOCK{idx}DISPLAYMATHBLOCK'

    # First, protect $$...$$ blocks (including multiline)
    value = re.sub(r'\$\$[\s\S]*?\$\$', store_display_block, value)
    
    # Check if this is a full HTML document (from AI responses)
    value_stripped = value.strip()
    if value_stripped.startswith('<!DOCTYPE html>') or value_stripped.startswith('<html'):
        # Extract content from HTML document
        # Extract content from <body> tag if present
        body_match = re.search(r'<body[^>]*>(.*?)</body>', value_stripped, re.DOTALL | re.IGNORECASE)
        if body_match:
            # Remove <title> and other head elements from body content
            body_content = body_match.group(1)
            # Remove any remaining <title> tags that might be in the body
            body_content = re.sub(r'<title[^>]*>.*?</title>', '', body_content, flags=re.DOTALL | re.IGNORECASE)
            # Don't strip yet - we need to preserve indentation for dedenting
            value = body_content
        else:
            # Fallback: remove DOCTYPE and html/head tags but keep content
            value = re.sub(r'<!DOCTYPE[^>]*>', '', value_stripped, flags=re.IGNORECASE)
            value = re.sub(r'<html[^>]*>', '', value, flags=re.IGNORECASE)
            value = re.sub(r'</html>', '', value, flags=re.IGNORECASE)
            value = re.sub(r'<head[^>]*>.*?</head>', '', value, flags=re.DOTALL | re.IGNORECASE)
            value = re.sub(r'<body[^>]*>', '', value, flags=re.IGNORECASE)
            value = re.sub(r'</body>', '', value, flags=re.IGNORECASE)
    
    # Remove ALL leading indentation from HTML content to prevent markdown from treating it as code blocks
    # This is especially important for GPT-5 responses which return nicely indented HTML
    # We need to be aggressive here because even content inside blockquotes can be indented
    if '<' in value and '>' in value:  # Only if it looks like HTML
        lines = value.split('\n')
        # Simply remove all leading spaces from each line
        # This is aggressive but necessary for nested HTML from AI responses
        dedented_lines = [line.lstrip() for line in lines]
        value = '\n'.join(dedented_lines)
    
    # Now strip leading/trailing whitespace
    value = value.strip()

    # Add markdown="1" attribute to HTML tags if markdown syntax is detected
    value = _add_markdown_attribute_to_html_tags(value)

    # Also sanitize inline math blocks ($...$) before markdown processing
    inline_math_blocks = []
    def store_inline_math(match):
        idx = len(inline_math_blocks)
        # Sanitize the inline math content
        math_content = sanitize_latex(match.group(0))
        inline_math_blocks.append(math_content)
        return f'INLINEMATHBLOCK{idx}INLINEMATHBLOCK'

    # Protect $...$ blocks (but not display math which was already protected)
    # This regex matches $...$ but not $$...$$
    value = re.sub(r'(?<!\$)\$(?!\$)([^\$]+?)\$(?!\$)', store_inline_math, value)

    # Configure markdown with math extension and proper list processing
    md = markdown.Markdown(extensions=[
        'mdx_math',                       # Math extension for LaTeX (process first)
        'markdown.extensions.md_in_html', # Process markdown inside HTML tags
        'markdown.extensions.extra',      # Tables, footnotes, abbreviations, def_list
        'markdown.extensions.sane_lists', # Better list processing
        # Removed codehilite - it was causing indented HTML to be treated as code blocks
    ], extension_configs={
        'mdx_math': {
            'enable_dollar_delimiter': True,  # Enable $...$ syntax
            'add_preview': True,             # Add preview for better UX
        }
    })

    html_content = md.convert(value)

    # Restore inline math blocks
    for idx, block in enumerate(inline_math_blocks):
        html_content = html_content.replace(f'INLINEMATHBLOCK{idx}INLINEMATHBLOCK', block)
    
    # Restore display math blocks and convert them to MathJax format
    for idx, block in enumerate(display_blocks):
        # Extract content between $$...$$ 
        math_content = block[2:-2].strip()  # Remove $$ delimiters
        # Create MathJax display math HTML
        mathjax_html = f'\n<div><span class="MathJax_Preview">{block}</span><script type="math/tex; mode=display">{math_content}</script>\n</div>\n'
        html_content = html_content.replace(f'DISPLAYMATHBLOCK{idx}DISPLAYMATHBLOCK', mathjax_html)
    
    return mark_safe(html_content)


def markdown_latex_reviews(value):
    """
    Special version of markdown_latex for reviews and review replies.
    Includes nl2br extension to preserve line breaks in review comments.
    """
    if not value:
        return ""

    # Add markdown="1" attribute to HTML tags if markdown syntax is detected
    value = _add_markdown_attribute_to_html_tags(value)

    # Configure markdown with nl2br for reviews
    md = markdown.Markdown(extensions=[
        'markdown.extensions.md_in_html', # Process markdown inside HTML tags (must be first!)
        'mdx_math',                       # Math extension for LaTeX
        'markdown.extensions.extra',      # Tables, footnotes, abbreviations, def_list
        'markdown.extensions.sane_lists', # Better list processing
        'markdown.extensions.nl2br',      # Convert newlines to <br> tags (for reviews)
        # Removed codehilite - it was causing indented HTML to be treated as code blocks
    ], extension_configs={
        'mdx_math': {
            'enable_dollar_delimiter': True,  # Enable $...$ syntax
            'add_preview': True,             # Add preview for better UX
        }
    })
    
    html_content = md.convert(value)
    return mark_safe(html_content)


# Register as template filters
register.filter('markdown_latex', markdown_latex)
register.filter('markdown_latex_reviews', markdown_latex_reviews)