#!/usr/bin/env python3
"""
Universal Message Validation and Cleaning Framework - Prevents empty message content and other common API errors
"""

import re
import logging
from typing import List, Dict, Any, Optional, Union
from dataclasses import dataclass

@dataclass
class MessageValidationResult:
    """Message validation result"""
    is_valid: bool
    cleaned_content: Optional[str] = None
    error_type: Optional[str] = None
    error_details: Optional[str] = None
    fixes_applied: List[str] = None

class UniversalMessageValidator:
    """Universal message validator - solves various message-related API errors"""
    
    def __init__(self):
        self.logger = logging.getLogger(__name__)
        
        # Common empty content patterns
        self.empty_patterns = [
            r'^\s*$',                    # Completely blank
            r'^[\s\n\r\t]*$',           # Only whitespace characters
            r'^None$',                   # None string
            r'^null$',                   # null string
            r'^undefined$',              # undefined string
            r'^""$',                     # Empty quotes
            r"^''$",                     # Empty single quotes
        ]
        
        # Meaningless content patterns
        self.meaningless_patterns = [
            r'^\.+$',                    # Only dots
            r'^-+$',                     # Only dashes
            r'^_+$',                     # Only underscores
            r'^[\.\-_\s]{1,10}$',       # Short symbol combinations
        ]
        
        # Common error fix mappings
        self.content_fixes = {
            'empty_think_tag': lambda s: s.replace('<think></think>', '').replace('<think>\n</think>', ''),
            'empty_answer_tag': lambda s: s.replace('<answer></answer>', '').replace('<answer>\n</answer>', ''),
            'duplicate_whitespace': lambda s: re.sub(r'\s+', ' ', s),
            'trailing_whitespace': lambda s: s.strip(),
            'html_entities': lambda s: s.replace('&nbsp;', ' ').replace('&amp;', '&').replace('&lt;', '<').replace('&gt;', '>'),
        }

    def validate_and_clean_content(self, content: Union[str, Dict, List]) -> MessageValidationResult:
        """Validate and clean message content"""
        
        # Handle different content types
        if isinstance(content, dict):
            return self._validate_dict_content(content)
        elif isinstance(content, list):
            return self._validate_list_content(content)
        elif isinstance(content, str):
            return self._validate_string_content(content)
        else:
            return MessageValidationResult(
                is_valid=False,
                error_type="invalid_type",
                error_details=f"Unsupported content type: {type(content)}"
            )

    def _validate_string_content(self, content: str) -> MessageValidationResult:
        """Validate string content"""
        fixes_applied = []
        
        # Check if content is empty
        if not content:
            return MessageValidationResult(
                is_valid=False,
                error_type="empty_content",
                error_details="Content is None or empty string"
            )
        
        # Apply basic fixes
        cleaned = content
        for fix_name, fix_func in self.content_fixes.items():
            old_content = cleaned
            cleaned = fix_func(cleaned)
            if old_content != cleaned:
                fixes_applied.append(fix_name)
        
        # Check for empty content patterns
        for pattern in self.empty_patterns:
            if re.match(pattern, cleaned, re.IGNORECASE):
                return MessageValidationResult(
                    is_valid=False,
                    error_type="empty_pattern",
                    error_details=f"Content matches empty pattern: {pattern}",
                    fixes_applied=fixes_applied
                )
        
        # Check for meaningless content patterns
        for pattern in self.meaningless_patterns:
            if re.match(pattern, cleaned):
                return MessageValidationResult(
                    is_valid=False,
                    error_type="meaningless_content",
                    error_details=f"Content appears meaningless: {pattern}",
                    fixes_applied=fixes_applied
                )
        
        # Check minimum length
        if len(cleaned.strip()) < 3:
            return MessageValidationResult(
                is_valid=False,
                error_type="too_short",
                error_details=f"Content too short after cleaning: '{cleaned}'",
                fixes_applied=fixes_applied
            )
        
        return MessageValidationResult(
            is_valid=True,
            cleaned_content=cleaned,
            fixes_applied=fixes_applied
        )

    def _validate_dict_content(self, content: Dict) -> MessageValidationResult:
        """Validate dictionary format message content"""
        fixes_applied = []
        
        # Check common message fields
        if 'content' in content:
            result = self._validate_string_content(content.get('content', ''))
            if not result.is_valid:
                return result
            content['content'] = result.cleaned_content
            fixes_applied.extend(result.fixes_applied or [])
        
        if 'value' in content:
            result = self._validate_string_content(content.get('value', ''))
            if not result.is_valid:
                return result
            content['value'] = result.cleaned_content
            fixes_applied.extend(result.fixes_applied or [])
        
        # Check if there is any valid content
        has_valid_content = False
        for key, value in content.items():
            if isinstance(value, str) and value.strip():
                has_valid_content = True
                break
        
        if not has_valid_content:
            return MessageValidationResult(
                is_valid=False,
                error_type="no_valid_content",
                error_details="Dictionary contains no valid string content"
            )
        
        return MessageValidationResult(
            is_valid=True,
            cleaned_content=content,
            fixes_applied=fixes_applied
        )

    def _validate_list_content(self, content: List) -> MessageValidationResult:
        """Validate list format message content"""
        if not content:
            return MessageValidationResult(
                is_valid=False,
                error_type="empty_list",
                error_details="Message list is empty"
            )
        
        fixes_applied = []
        cleaned_list = []
        
        for i, item in enumerate(content):
            result = self.validate_and_clean_content(item)
            if result.is_valid:
                cleaned_list.append(result.cleaned_content)
                if result.fixes_applied:
                    fixes_applied.extend([f"item_{i}_{fix}" for fix in result.fixes_applied])
            else:
                # Log issues but continue processing other items
                self.logger.warning(f"Invalid item at index {i}: {result.error_details}")
        
        if not cleaned_list:
            return MessageValidationResult(
                is_valid=False,
                error_type="no_valid_items",
                error_details="No valid items found in list after validation"
            )
        
        return MessageValidationResult(
            is_valid=True,
            cleaned_content=cleaned_list,
            fixes_applied=fixes_applied
        )

    def validate_messages_before_api_call(self, messages: List[Dict]) -> MessageValidationResult:
        """Validate entire message list before API call"""
        if not messages:
            return MessageValidationResult(
                is_valid=False,
                error_type="empty_messages_list",
                error_details="Messages list is empty - this will cause API error"
            )
        
        all_fixes = []
        cleaned_messages = []
        
        for i, message in enumerate(messages):
            # Validate message structure
            if not isinstance(message, dict):
                self.logger.error(f"Message {i} is not a dict: {type(message)}")
                continue
            
            # Validate required fields
            if 'role' not in message:
                self.logger.error(f"Message {i} missing 'role' field")
                continue
            
            if 'content' not in message:
                self.logger.error(f"Message {i} missing 'content' field")
                continue
            
            # Validate content
            result = self.validate_and_clean_content(message['content'])
            if result.is_valid:
                cleaned_message = message.copy()
                cleaned_message['content'] = result.cleaned_content
                cleaned_messages.append(cleaned_message)
                if result.fixes_applied:
                    all_fixes.extend([f"msg_{i}_{fix}" for fix in result.fixes_applied])
            else:
                self.logger.warning(f"Message {i} failed validation: {result.error_details}")
        
        if not cleaned_messages:
            return MessageValidationResult(
                is_valid=False,
                error_type="no_valid_messages",
                error_details="No valid messages remain after validation"
            )
        
        return MessageValidationResult(
            is_valid=True,
            cleaned_content=cleaned_messages,
            fixes_applied=all_fixes
        )

    def create_fallback_message(self, context: str = "task") -> Dict:
        """Create fallback message to prevent complete failure"""
        return {
            "role": "user",
            "content": f"Please help me with this {context}. If you need clarification, please ask specific questions."
        }

# Global validator instance
message_validator = UniversalMessageValidator()