#!/usr/bin/env python3
"""
HTML Syntax Checker using HTMLHint
This script validates HTML files against a comprehensive set of rules for clean, accessible HTML.
"""

import os
import sys
import json
import subprocess
import tempfile
from pathlib import Path
from typing import Dict, List, Optional, Any


class HTMLSyntaxChecker:
    """
    A Python wrapper for HTMLHint to validate HTML files against specified rules.
    """
    
    def __init__(self):
        """Initialize the HTML syntax checker with predefined rules."""
        self.rules = {
            "doctype-html5": True,                       # Enforce HTML5 doctype declaration
            "tagname-lowercase": True,                   # Enforce lowercase tag names
            "attr-lowercase": True,                      # Enforce lowercase attribute names
            "attr-value-double-quotes": True,            # Enforce double quotes for attribute values
            "tag-pair": True,                            # Enforce all tags must have a corresponding closing tag
            "tag-self-close": ["br", "img", "input", "link", "meta"], # Allow self-closing tags for specific elements
            "id-unique": True,                           # Ensure 'id' attribute is unique in the document
            "alt-require": True,                         # Enforce 'alt' attribute for all <img> tags for accessibility
            "head-script-disabled": False,               # Allow <script> tags in the <head> section
            "style-disabled": False,                     # Allow inline CSS styles within HTML
            "no-inline-style": False,                    # Allow inline styles within HTML
            "no-inline-script": False,                   # Allow inline JavaScript
            "lang-require": True,                        # Enforce 'lang' attribute in the <html> tag for accessibility
            "meta-charset-utf-8": True,                  # Ensure UTF-8 charset declaration
            "meta-viewport": True,                       # Enforce viewport meta tag for responsive design
            "title-require": True,                       # Enforce the inclusion of the <title> tag
            "csslint": {
                "important": False,                      # Allow the use of !important in CSS
                "order-alphabetical": False              # Do not enforce alphabetical order for CSS properties
            },
            "script-disabled": False,                    # Allow JavaScript
        }
    
    def check_htmlhint_installation(self) -> bool:
        """
        Check if HTMLHint is installed and available in the system.
        
        Returns:
            bool: True if HTMLHint is available, False otherwise
        """
        try:
            result = subprocess.run(['htmlhint', '--version'], 
                                  capture_output=True, text=True, timeout=10)
            return result.returncode == 0
        except (subprocess.TimeoutExpired, FileNotFoundError):
            return False
    
    def install_htmlhint(self) -> bool:
        """
        Attempt to install HTMLHint using npm.
        
        Returns:
            bool: True if installation was successful, False otherwise
        """
        try:
            print("HTMLHint not found. Attempting to install via npm...")
            result = subprocess.run(['npm', 'install', '-g', 'htmlhint'], 
                                  capture_output=True, text=True, timeout=60)
            
            if result.returncode == 0:
                print("HTMLHint installed successfully!")
                return True
            else:
                print(f"Failed to install HTMLHint: {result.stderr}")
                return False
                
        except (subprocess.TimeoutExpired, FileNotFoundError) as e:
            print(f"Error installing HTMLHint: {e}")
            print("Please install Node.js and npm first, then run: npm install -g htmlhint")
            return False
    
    def create_config_file(self) -> str:
        """
        Create a temporary HTMLHint configuration file with the specified rules.
        
        Returns:
            str: Path to the temporary configuration file
        """
        config_content = json.dumps(self.rules, indent=2)
        
        # Create temporary config file
        temp_config = tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False)
        temp_config.write(config_content)
        temp_config.close()
        
        return temp_config.name
    
    def validate_html_file(self, file_path: str) -> Dict[str, Any]:
        """
        Validate a single HTML file using HTMLHint.
        
        Args:
            file_path (str): Path to the HTML file to validate
            
        Returns:
            Dict[str, Any]: Validation results containing errors and warnings
        """
        if not os.path.exists(file_path):
            return {
                'success': False,
                'error': f'File not found: {file_path}',
                'issues': []
            }
        
        # Ensure HTMLHint is available
        if not self.check_htmlhint_installation():
            if not self.install_htmlhint():
                return {
                    'success': False,
                    'error': 'HTMLHint is not available and could not be installed',
                    'issues': []
                }
        
        # Create temporary config file
        config_file = self.create_config_file()
        
        try:
            # Run HTMLHint with the configuration
            cmd = ['htmlhint', '--config', config_file, '--format', 'json', file_path]
            result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
            
            # Parse the JSON output
            if result.stdout:
                try:
                    issues = json.loads(result.stdout)
                    return {
                        'success': True,
                        'file_path': file_path,
                        'issues': issues,
                        'total_issues': len(issues)
                    }
                except json.JSONDecodeError:
                    # If JSON parsing fails, return raw output
                    return {
                        'success': True,
                        'file_path': file_path,
                        'raw_output': result.stdout,
                        'stderr': result.stderr,
                        'issues': []
                    }
            else:
                # No issues found or empty output
                return {
                    'success': True,
                    'file_path': file_path,
                    'issues': [],
                    'total_issues': 0
                }
                
        except subprocess.TimeoutExpired:
            return {
                'success': False,
                'error': 'HTMLHint execution timed out',
                'issues': []
            }
        except Exception as e:
            return {
                'success': False,
                'error': f'Error running HTMLHint: {str(e)}',
                'issues': []
            }
        finally:
            # Clean up temporary config file
            try:
                os.unlink(config_file)
            except OSError:
                pass
    
    def validate_html_content(self, html_content: str) -> Dict[str, Any]:
        """
        Validate HTML content (string) using HTMLHint.
        
        Args:
            html_content (str): HTML content as a string
            
        Returns:
            Dict[str, Any]: Validation results containing errors and warnings
        """
        # Create temporary HTML file
        temp_html = tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False)
        temp_html.write(html_content)
        temp_html.close()
        
        try:
            result = self.validate_html_file(temp_html.name)
            result['content_validation'] = True
            return result
        finally:
            # Clean up temporary HTML file
            try:
                os.unlink(temp_html.name)
            except OSError:
                pass
    
    def format_validation_report(self, validation_result: Dict[str, Any]) -> str:
        """
        Format the validation results into a readable report.
        
        Args:
            validation_result (Dict[str, Any]): Results from validate_html_file or validate_html_content
            
        Returns:
            str: Formatted validation report
        """
        if not validation_result['success']:
            return f"❌ Validation failed: {validation_result.get('error', 'Unknown error')}"
        
        file_path = validation_result.get('file_path', 'HTML Content')
        issues = validation_result.get('issues', [])
        total_issues = validation_result.get('total_issues', len(issues))
        
        if total_issues == 0:
            return f"✅ {file_path}: No issues found! HTML is valid."
        
        report_lines = [
            f"📋 Validation Report for: {file_path}",
            f"📊 Total Issues Found: {total_issues}",
            "=" * 60
        ]
        
        # Group issues by type
        errors = []
        warnings = []
        
        for issue in issues:
            if isinstance(issue, dict):
                issue_type = issue.get('type', 'error')
                if issue_type.lower() == 'warning':
                    warnings.append(issue)
                else:
                    errors.append(issue)
            else:
                # Handle non-dict issues (raw text)
                errors.append({'message': str(issue), 'line': 'N/A', 'col': 'N/A'})
        
        # Format errors
        if errors:
            report_lines.append(f"\n🚨 ERRORS ({len(errors)}):")
            for i, error in enumerate(errors, 1):
                line = error.get('line', 'N/A')
                col = error.get('col', 'N/A')
                message = error.get('message', str(error))
                rule = error.get('rule', {}).get('id', 'unknown')
                
                report_lines.append(f"  {i}. Line {line}, Column {col}")
                report_lines.append(f"     Rule: {rule}")
                report_lines.append(f"     Message: {message}")
                report_lines.append("")
        
        # Format warnings
        if warnings:
            report_lines.append(f"\n⚠️  WARNINGS ({len(warnings)}):")
            for i, warning in enumerate(warnings, 1):
                line = warning.get('line', 'N/A')
                col = warning.get('col', 'N/A')
                message = warning.get('message', str(warning))
                rule = warning.get('rule', {}).get('id', 'unknown')
                
                report_lines.append(f"  {i}. Line {line}, Column {col}")
                report_lines.append(f"     Rule: {rule}")
                report_lines.append(f"     Message: {message}")
                report_lines.append("")
        
        return "\n".join(report_lines)
    
    def validate_directory(self, directory_path: str) -> Dict[str, Any]:
        """
        Validate all HTML files in a directory.
        
        Args:
            directory_path (str): Path to the directory containing HTML files
            
        Returns:
            Dict[str, Any]: Combined validation results for all files
        """
        if not os.path.exists(directory_path):
            return {
                'success': False,
                'error': f'Directory not found: {directory_path}',
                'results': {}
            }
        
        html_files = []
        for root, dirs, files in os.walk(directory_path):
            for file in files:
                if file.lower().endswith(('.html', '.htm')):
                    html_files.append(os.path.join(root, file))
        
        if not html_files:
            return {
                'success': True,
                'message': f'No HTML files found in {directory_path}',
                'results': {}
            }
        
        results = {}
        total_issues = 0
        
        print(f"🔍 Found {len(html_files)} HTML file(s) to validate...")
        
        for html_file in html_files:
            print(f"Validating: {html_file}")
            result = self.validate_html_file(html_file)
            results[html_file] = result
            
            if result['success']:
                total_issues += result.get('total_issues', 0)
        
        return {
            'success': True,
            'directory': directory_path,
            'total_files': len(html_files),
            'total_issues': total_issues,
            'results': results
        }


def main():
    """
    Main function to demonstrate the HTML syntax checker usage.
    """
    checker = HTMLSyntaxChecker()
    
    # Example usage
    if len(sys.argv) > 1:
        # Validate file or directory provided as command line argument
        target = sys.argv[1]
        
        if os.path.isfile(target):
            print(f"🔍 Validating HTML file: {target}")
            result = checker.validate_html_file(target)
            print(checker.format_validation_report(result))
            
        elif os.path.isdir(target):
            print(f"🔍 Validating HTML files in directory: {target}")
            results = checker.validate_directory(target)
            
            if results['success'] and results.get('results'):
                print(f"\n📊 Summary: {results['total_files']} files, {results['total_issues']} total issues")
                print("=" * 60)
                
                for file_path, result in results['results'].items():
                    print(checker.format_validation_report(result))
                    print("-" * 40)
            else:
                print(results.get('message', results.get('error', 'No results')))
        else:
            print(f"❌ Invalid target: {target}")
            sys.exit(1)
    else:
        # Demonstrate with sample HTML content
        print("🧪 Testing with sample HTML content...")
        
        # Sample HTML with intentional issues for demonstration
        sample_html = """
        <HTML>
        <head>
            <title>Test Page</title>
        </head>
        <body>
            <h1 id="header">Welcome</h1>
            <img src="test.jpg">
            <p id="header">Duplicate ID issue</p>
            <DIV>Mixed case tags</DIV>
            <input type='text' name='test'>
        </body>
        </HTML>
        """
        
        result = checker.validate_html_content(sample_html)
        print(checker.format_validation_report(result))
    


if __name__ == "__main__":
    main()
