"""
Terminal Log Storage Manager with Hash-Based Deduplication

This module manages terminal log storage using SHA-256 hashes for deduplication,
allowing multiple evaluations with identical outputs to share the same log file.
"""

import os
import hashlib
import logging
from pathlib import Path
from typing import Optional, Dict, Any
from django.conf import settings
from django.utils import timezone

logger = logging.getLogger(__name__)


class TerminalLogManager:
    """
    Manages terminal log storage with hash-based deduplication.
    
    Key Features:
    - SHA-256 hash-based file naming for automatic deduplication
    - Configurable log storage directory
    - Atomic file operations for consistency
    - Automatic directory creation
    - Log metadata tracking
    """
    
    def __init__(self, log_directory: Optional[str] = None):
        """
        Initialize the terminal log manager.
        
        Args:
            log_directory: Directory to store log files (default from settings)
        """
        # Set log directory (default from settings or fallback to local directory)
        default_log_dir = Path(__file__).parent.parent.parent / 'terminal_logs'
        self.log_directory = Path(
            log_directory or 
            getattr(settings, 'TERMINAL_LOG_DIRECTORY', str(default_log_dir))
        )
        
        # Ensure directory exists
        self.log_directory.mkdir(parents=True, exist_ok=True)
        
        # Set up metadata directory for log information
        self.metadata_directory = self.log_directory / 'metadata'
        self.metadata_directory.mkdir(exist_ok=True)
        
        logger.info(f"Terminal log manager initialized: {self.log_directory}")
    
    def store_terminal_log(self, terminal_output: str, metadata: Optional[Dict[str, Any]] = None) -> str:
        """
        Store terminal log content and return its hash.
        
        Args:
            terminal_output: Terminal output content to store
            metadata: Optional metadata about the log (evaluation info, timestamps, etc.)
            
        Returns:
            SHA-256 hash of the content (used as filename and database reference)
        """
        try:
            # Calculate SHA-256 hash of content
            content_hash = self._calculate_hash(terminal_output)
            log_file_path = self.log_directory / f"{content_hash}.log"
            
            # Check if file already exists (deduplication)
            if log_file_path.exists():
                logger.debug(f"Log file already exists: {content_hash}.log")
                
                # Update metadata if provided
                if metadata:
                    self._update_metadata(content_hash, metadata)
                
                return content_hash
            
            # Write log content atomically
            temp_file_path = log_file_path.with_suffix('.tmp')
            
            try:
                with open(temp_file_path, 'w', encoding='utf-8') as f:
                    f.write(terminal_output)
                
                # Atomic move
                temp_file_path.rename(log_file_path)
                
                # Store metadata if provided
                if metadata:
                    self._store_metadata(content_hash, metadata)
                
                logger.info(f"Stored terminal log: {content_hash}.log ({len(terminal_output)} chars)")
                return content_hash
                
            except Exception as e:
                # Cleanup temp file on error
                if temp_file_path.exists():
                    temp_file_path.unlink()
                raise e
                
        except Exception as e:
            logger.error(f"Error storing terminal log: {str(e)}")
            raise
    
    def retrieve_terminal_log(self, log_hash: str) -> Optional[str]:
        """
        Retrieve terminal log content by hash.
        
        Args:
            log_hash: SHA-256 hash of the log content
            
        Returns:
            Log content or None if not found
        """
        try:
            log_file_path = self.log_directory / f"{log_hash}.log"
            
            if not log_file_path.exists():
                logger.warning(f"Log file not found: {log_hash}.log")
                return None
            
            with open(log_file_path, 'r', encoding='utf-8') as f:
                content = f.read()
            
            logger.debug(f"Retrieved terminal log: {log_hash}.log")
            return content
            
        except Exception as e:
            logger.error(f"Error retrieving terminal log {log_hash}: {str(e)}")
            return None
    
    def get_log_metadata(self, log_hash: str) -> Optional[Dict[str, Any]]:
        """
        Get metadata for a log file.
        
        Args:
            log_hash: SHA-256 hash of the log content
            
        Returns:
            Metadata dictionary or None if not found
        """
        try:
            metadata_file = self.metadata_directory / f"{log_hash}.json"
            
            if not metadata_file.exists():
                return None
            
            import json
            with open(metadata_file, 'r', encoding='utf-8') as f:
                metadata = json.load(f)
            
            return metadata
            
        except Exception as e:
            logger.error(f"Error retrieving metadata for {log_hash}: {str(e)}")
            return None
    
    def _calculate_hash(self, content: str) -> str:
        """
        Calculate SHA-256 hash of content.
        
        Args:
            content: Content to hash
            
        Returns:
            Hexadecimal hash string
        """
        return hashlib.sha256(content.encode('utf-8')).hexdigest()
    
    def _store_metadata(self, log_hash: str, metadata: Dict[str, Any]) -> None:
        """
        Store metadata for a log file.
        
        Args:
            log_hash: SHA-256 hash of the log content
            metadata: Metadata to store
        """
        try:
            import json
            
            # Add standard metadata
            metadata = metadata.copy()  # Don't modify original
            metadata.update({
                'log_hash': log_hash,
                'stored_at': timezone.now().isoformat(),
                'file_path': f"{log_hash}.log"
            })
            
            metadata_file = self.metadata_directory / f"{log_hash}.json"
            temp_metadata_file = metadata_file.with_suffix('.tmp')
            
            try:
                with open(temp_metadata_file, 'w', encoding='utf-8') as f:
                    json.dump(metadata, f, indent=2, default=str)
                
                # Atomic move
                temp_metadata_file.rename(metadata_file)
                
            except Exception as e:
                if temp_metadata_file.exists():
                    temp_metadata_file.unlink()
                raise e
                
        except Exception as e:
            logger.error(f"Error storing metadata for {log_hash}: {str(e)}")
    
    def _update_metadata(self, log_hash: str, new_metadata: Dict[str, Any]) -> None:
        """
        Update existing metadata for a log file.
        
        Args:
            log_hash: SHA-256 hash of the log content
            new_metadata: New metadata to merge
        """
        try:
            # Load existing metadata
            existing_metadata = self.get_log_metadata(log_hash) or {}
            
            # Merge with new metadata
            existing_metadata.update(new_metadata)
            existing_metadata['updated_at'] = timezone.now().isoformat()
            
            # Store updated metadata
            self._store_metadata(log_hash, existing_metadata)
            
        except Exception as e:
            logger.error(f"Error updating metadata for {log_hash}: {str(e)}")
    
    def get_storage_stats(self) -> Dict[str, Any]:
        """
        Get storage statistics for the log directory.
        
        Returns:
            Dictionary with storage statistics
        """
        try:
            log_files = list(self.log_directory.glob('*.log'))
            metadata_files = list(self.metadata_directory.glob('*.json'))
            
            total_size = sum(f.stat().st_size for f in log_files)
            
            return {
                'log_files_count': len(log_files),
                'metadata_files_count': len(metadata_files),
                'total_size_bytes': total_size,
                'total_size_mb': round(total_size / (1024 * 1024), 2),
                'directory': str(self.log_directory),
                'deduplication_savings': len(log_files) - len(set(self._calculate_hash(
                    f.read_text(encoding='utf-8')
                ) for f in log_files)) if log_files else 0
            }
            
        except Exception as e:
            logger.error(f"Error getting storage stats: {str(e)}")
            return {'error': str(e)}
    
    def cleanup_orphaned_metadata(self) -> int:
        """
        Clean up metadata files that don't have corresponding log files.
        
        Returns:
            Number of orphaned metadata files removed
        """
        try:
            removed_count = 0
            metadata_files = list(self.metadata_directory.glob('*.json'))
            
            for metadata_file in metadata_files:
                log_hash = metadata_file.stem
                log_file = self.log_directory / f"{log_hash}.log"
                
                if not log_file.exists():
                    metadata_file.unlink()
                    removed_count += 1
                    logger.info(f"Removed orphaned metadata: {metadata_file.name}")
            
            return removed_count
            
        except Exception as e:
            logger.error(f"Error cleaning up orphaned metadata: {str(e)}")
            return 0
    
    def verify_log_integrity(self, log_hash: str) -> bool:
        """
        Verify that a log file's content matches its hash.
        
        Args:
            log_hash: Expected SHA-256 hash
            
        Returns:
            True if integrity check passes, False otherwise
        """
        try:
            content = self.retrieve_terminal_log(log_hash)
            if content is None:
                return False
            
            calculated_hash = self._calculate_hash(content)
            return calculated_hash == log_hash
            
        except Exception as e:
            logger.error(f"Error verifying log integrity for {log_hash}: {str(e)}")
            return False


# Global terminal log manager instance
terminal_log_manager = TerminalLogManager()
