"""
File Access Control for Multi-Agent System

Controls which files and directories each agent can access.
This implements the "blind builder" pattern and other access restrictions.

Uses Claude Agent SDK's PreToolUse hooks for enforcement.

Key design:
- Separate READ and WRITE permissions for fine-grained control
- Deny rules take precedence over allow rules
- Each agent has specific scope to prevent overreach
"""

from typing import List, Set, Dict, Any
from pathlib import Path
import fnmatch
import re
from .tool_controller import AgentType

# Import SDK types for hooks
from claude_agent_sdk.types import (
    HookMatcher,
    PreToolUseHookInput,
    HookContext,
)


class FileAccessController:
    """
    Controls file and directory access permissions for each agent.

    Key features:
    - Builder agent cannot see tests/ directory (blind testing)
    - Agents only see files in .agent_state relevant to them
    - Separate read/write permissions for precise control
    - Orchestrator enforces these restrictions at SDK level
    """

    # Simplified file access rules
    # All agents can read/write all files in the working directory
    # Only restriction: .logs/* is not accessible by any agent
    #
    # Security is enforced by:
    # 1. Working directory boundary (cannot access files outside CVE working dir)
    # 2. Dangerous command filtering (docker prune, rm -rf /, etc.)
    # 3. System path protection (cannot write to /tmp, /etc, /home, etc.)

    ACCESS_RULES: Dict[AgentType, Dict[str, List[str]]] = {
        AgentType.ANALYZER: {
            'read_allowed': ['*'],
            'write_allowed': ['*'],
            'denied': ['.logs/*'],
        },

        AgentType.GENERATOR: {
            'read_allowed': ['*'],
            'write_allowed': ['*'],
            'denied': ['.logs/*'],
        },

        AgentType.BUILDER: {
            'read_allowed': ['*'],
            'write_allowed': ['*'],
            'denied': ['.logs/*', 'tests/*', 'solution.sh'],
        },

        AgentType.VALIDATOR: {
            'read_allowed': ['*'],
            'write_allowed': ['*'],
            'denied': ['.logs/*'],
        },

        AgentType.SOLVER: {
            'read_allowed': ['*'],
            'write_allowed': ['*'],
            'denied': ['.logs/*'],
        },

        AgentType.CHECKER: {
            'read_allowed': ['*'],
            'write_allowed': ['*'],
            'denied': ['.logs/*'],
        },
    }

    @classmethod
    def can_read_file(cls, agent_type: AgentType, file_path: str) -> bool:
        """
        Check if an agent can READ a specific file.

        Args:
            agent_type: The type of agent
            file_path: Path to file (relative to CVE working directory)

        Returns:
            True if read is allowed, False if denied
        """
        rules = cls.ACCESS_RULES.get(agent_type, {})
        read_allowed = rules.get('read_allowed', [])
        denied_patterns = rules.get('denied', [])

        # Normalize path
        norm_path = str(Path(file_path))

        # Check denied patterns first (deny takes precedence)
        for pattern in denied_patterns:
            if fnmatch.fnmatch(norm_path, pattern):
                return False

        # Check read allowed patterns
        for pattern in read_allowed:
            if fnmatch.fnmatch(norm_path, pattern):
                return True

        # Default deny if no pattern matches
        return False

    @classmethod
    def can_write_file(cls, agent_type: AgentType, file_path: str) -> bool:
        """
        Check if an agent can WRITE (create/modify) a specific file.

        Args:
            agent_type: The type of agent
            file_path: Path to file (relative to CVE working directory)

        Returns:
            True if write is allowed, False if denied
        """
        rules = cls.ACCESS_RULES.get(agent_type, {})
        write_allowed = rules.get('write_allowed', [])
        denied_patterns = rules.get('denied', [])

        # Normalize path
        norm_path = str(Path(file_path))

        # Check denied patterns first (deny takes precedence)
        for pattern in denied_patterns:
            if fnmatch.fnmatch(norm_path, pattern):
                return False

        # Check write allowed patterns
        for pattern in write_allowed:
            if fnmatch.fnmatch(norm_path, pattern):
                return True

        # Default deny if no pattern matches
        return False

    @classmethod
    def can_access_file(cls, agent_type: AgentType, file_path: str) -> bool:
        """
        Check if an agent can access (read) a specific file.
        Legacy method for backward compatibility.

        Args:
            agent_type: The type of agent
            file_path: Path to file (relative to CVE working directory)

        Returns:
            True if access is allowed, False if denied
        """
        return cls.can_read_file(agent_type, file_path)

    @classmethod
    def get_accessible_files(cls, agent_type: AgentType, all_files: List[str]) -> List[str]:
        """
        Filter a list of files to only those readable by an agent.

        Args:
            agent_type: The type of agent
            all_files: List of all file paths

        Returns:
            Filtered list of accessible files
        """
        return [
            f for f in all_files
            if cls.can_read_file(agent_type, f)
        ]

    @classmethod
    def get_writable_files(cls, agent_type: AgentType, all_files: List[str]) -> List[str]:
        """
        Filter a list of files to only those writable by an agent.

        Args:
            agent_type: The type of agent
            all_files: List of all file paths

        Returns:
            Filtered list of writable files
        """
        return [
            f for f in all_files
            if cls.can_write_file(agent_type, f)
        ]

    @classmethod
    def get_hidden_directories(cls, agent_type: AgentType) -> Set[str]:
        """
        Get set of directory patterns that are hidden from an agent.

        Args:
            agent_type: The type of agent

        Returns:
            Set of directory path patterns that are hidden
        """
        rules = cls.ACCESS_RULES.get(agent_type, {})
        denied_patterns = rules.get('denied', [])

        # Extract directory patterns (those ending with /*)
        dirs = set()
        for pattern in denied_patterns:
            if pattern.endswith('/*'):
                dirs.add(pattern[:-2])  # Remove /*
            elif '/' in pattern and not pattern.endswith('*'):
                dirs.add(str(Path(pattern).parent))

        return dirs

    @classmethod
    def get_visible_file_tree(cls, agent_type: AgentType, base_path: Path) -> Dict:
        """
        Generate a file tree view showing what agent can see.
        Useful for debugging and visualization.

        Args:
            agent_type: The type of agent
            base_path: Base directory path

        Returns:
            Dictionary representing visible file tree
        """
        tree = {}

        if not base_path.exists():
            return tree

        for item in base_path.rglob('*'):
            rel_path = str(item.relative_to(base_path))
            if cls.can_read_file(agent_type, rel_path):
                parts = Path(rel_path).parts
                current = tree
                for part in parts[:-1]:
                    current = current.setdefault(part, {})
                if item.is_file():
                    current[parts[-1]] = 'FILE'
                else:
                    current.setdefault(parts[-1], {})

        return tree

def check_builder_blind_testing(working_dir: Path) -> Dict[str, bool]:
    """
    Verify that Builder agent correctly implements blind testing.

    Args:
        working_dir: CVE working directory

    Returns:
        Dictionary with validation results
    """
    results = {
        'tests_dir_exists': (working_dir / 'tests').exists(),
        'builder_can_read_tests': FileAccessController.can_read_file(
            AgentType.BUILDER,
            'tests/test_vuln.py'
        ),
        'builder_can_write_tests': FileAccessController.can_write_file(
            AgentType.BUILDER,
            'tests/test_vuln.py'
        ),
        'builder_can_read_solution': FileAccessController.can_read_file(
            AgentType.BUILDER,
            'solution.sh'
        ),
        'validator_can_read_tests': FileAccessController.can_read_file(
            AgentType.VALIDATOR,
            'tests/test_vuln.py'
        ),
        'validator_can_write_tests': FileAccessController.can_write_file(
            AgentType.VALIDATOR,
            'tests/test_vuln.py'
        ),
        'validator_can_read_solution': FileAccessController.can_read_file(
            AgentType.VALIDATOR,
            'solution.sh'
        ),
        'validator_can_write_solution': FileAccessController.can_write_file(
            AgentType.VALIDATOR,
            'solution.sh'
        ),
    }

    # Builder should NOT see tests or solution
    # Validator CAN read/write tests but NOT write solution
    results['blind_testing_ok'] = (
        not results['builder_can_read_tests'] and
        not results['builder_can_write_tests'] and
        not results['builder_can_read_solution'] and
        results['validator_can_read_tests'] and
        results['validator_can_write_tests'] and
        results['validator_can_read_solution'] and
        not results['validator_can_write_solution']
    )

    return results


def _get_denial_reason(agent_type: AgentType, file_path: str, operation: str = "access") -> str:
    """
    Get a human-readable reason for why access was denied.

    Args:
        agent_type: The agent type
        file_path: The file path that was denied
        operation: "read" or "write"

    Returns:
        Explanation string
    """
    if file_path.startswith('.logs'):
        return "'.logs/' directory is reserved for orchestrator logs"

    return f"Cannot {operation} '{file_path}'"


def create_file_access_hooks(
    agent_type: AgentType,
    working_dir: Path
) -> Dict[str, List[HookMatcher]]:
    """
    Create PreToolUse hooks for file access control.

    This is the recommended approach since can_use_tool callback has bugs
    in the current SDK version (see GitHub issues #227, #200).

    Args:
        agent_type: The type of agent (determines access rules)
        working_dir: The CVE working directory (base for relative paths)

    Returns:
        Dictionary of hooks to pass to ClaudeAgentOptions
    """
    import logging
    from .tool_controller import ToolController, CommandFilter

    logger = logging.getLogger(__name__)

    # Extract CVE-ID from working directory name
    cve_id = working_dir.name if working_dir.name.startswith('CVE-') else 'unknown'

    # Tools categorized by operation type
    READ_TOOLS = {'Read': 'file_path', 'Glob': 'path', 'Grep': 'path'}
    WRITE_TOOLS = {'Write': 'file_path', 'Edit': 'file_path'}

    # System paths blocked for all agents
    GLOBAL_BLOCKED_PATHS = ['/tmp', '/var/tmp', '/home', '/root', '/etc', '/usr', '/var']

    # Calculate project root and allowed read paths outside working_dir
    # Structure: project_root/cve_tasks/CVE-xxx/ -> working_dir
    # We allow READ access to project_root/scripts/ for check scripts
    project_root = working_dir.parent.parent
    scripts_dir = str((project_root / 'scripts').resolve())
    ALLOWED_READ_DIRS = [scripts_dir]  # Read-only access to check scripts

    def _deny_response(hook_event_name: str, reason: str) -> Dict[str, Any]:
        """Create a properly formatted deny response for PreToolUse hooks."""
        return {
            'hookSpecificOutput': {
                'hookEventName': hook_event_name,
                'permissionDecision': 'deny',
                'permissionDecisionReason': reason,
            }
        }

    async def pre_tool_use_hook(
        input_data: PreToolUseHookInput,
        tool_use_id: str,
        context: HookContext
    ) -> Dict[str, Any]:
        """PreToolUse hook for file access control."""
        tool_name = input_data.get('tool_name', '')
        tool_input = input_data.get('tool_input', {})
        hook_event_name = input_data.get('hook_event_name', 'PreToolUse')

        # First check: Is the tool allowed for this agent type?
        if not ToolController.is_tool_allowed(agent_type, tool_name):
            logger.warning(f"[FILE_ACCESS_HOOK] {cve_id} | {agent_type.value} | DENIED: tool {tool_name} not allowed")
            return _deny_response(hook_event_name, f"Tool '{tool_name}' is not allowed for {agent_type.value}")

        # Check Bash commands for dangerous patterns
        if tool_name == 'Bash':
            command = tool_input.get('command', '')

            # Check dangerous patterns
            is_dangerous, reason = CommandFilter.is_dangerous(command)
            if is_dangerous:
                logger.warning(f"[FILE_ACCESS_HOOK] {cve_id} | {agent_type.value} | BLOCKED: dangerous cmd - {reason}")
                return _deny_response(hook_event_name, f"Dangerous command blocked: {reason}")

            # Check for writes to system paths (except docker exec/run)
            is_docker_command = bool(re.search(r'\bdocker\s+(exec|run)\b', command))
            if not is_docker_command:
                for blocked_path in GLOBAL_BLOCKED_PATHS:
                    escaped_path = re.escape(blocked_path)
                    write_patterns = [
                        rf'>\s*{escaped_path}/', rf'>>\s*{escaped_path}/',
                        rf'\btee\b.*{escaped_path}/', rf'\bcp\b.*\s+{escaped_path}/',
                        rf'\bmv\b.*\s+{escaped_path}/', rf'\btouch\b.*{escaped_path}/',
                        rf'\bmkdir\b.*{escaped_path}/', rf'\bgit\s+clone\b.*{escaped_path}/',
                        rf'\bcd\s+{escaped_path}[/\s]',
                    ]
                    for pattern in write_patterns:
                        if re.search(pattern, command, re.IGNORECASE):
                            logger.warning(f"[FILE_ACCESS_HOOK] {cve_id} | {agent_type.value} | BLOCKED: write to system path {blocked_path}")
                            return _deny_response(hook_event_name, f"Cannot write to system path '{blocked_path}'. Use CVE working directory.")

            # Allow Bash command
            return {}

        # Determine if this is a read or write operation
        is_read = tool_name in READ_TOOLS
        is_write = tool_name in WRITE_TOOLS

        # Non-file tools are allowed
        if not is_read and not is_write:
            return {}

        # Get file path
        path_param = READ_TOOLS.get(tool_name) or WRITE_TOOLS.get(tool_name)
        file_path = tool_input.get(path_param)

        if not file_path:
            return {}

        # Validate path is within working directory or allowed read directories
        working_dir_resolved = str(working_dir.resolve())
        try:
            path_obj = Path(file_path)
            if path_obj.is_absolute():
                abs_path_str = str(path_obj.resolve())
            else:
                abs_path = (working_dir / file_path).resolve()
                abs_path_str = str(abs_path)

            # Check if path is within working directory (add / to prevent prefix attacks)
            is_in_working_dir = abs_path_str.startswith(working_dir_resolved + '/') or abs_path_str == working_dir_resolved

            # Check if path is in allowed read directories (for read operations only)
            is_in_allowed_read_dir = False
            if is_read:
                for allowed_dir in ALLOWED_READ_DIRS:
                    # Use os.sep to prevent path prefix attacks (/scripts vs /scripts_malicious)
                    if abs_path_str.startswith(allowed_dir + '/') or abs_path_str == allowed_dir:
                        is_in_allowed_read_dir = True
                        break

            # Deny if not in working dir and not in allowed read dirs
            if not is_in_working_dir and not is_in_allowed_read_dir:
                logger.warning(f"[FILE_ACCESS_HOOK] {cve_id} | {agent_type.value} | DENIED: path outside working dir: {file_path}")
                return _deny_response(hook_event_name, f"Cannot access files outside CVE working directory: {file_path}")

            # For allowed read dirs, skip pattern checking (just allow read)
            if is_in_allowed_read_dir:
                return {}

            # Extract relative path for pattern checking (only for working_dir paths)
            if path_obj.is_absolute():
                rel_path = abs_path_str[len(working_dir_resolved):].lstrip('/')
            else:
                rel_path = file_path
        except Exception as e:
            logger.warning(f"[FILE_ACCESS_HOOK] {cve_id} | {agent_type.value} | ERROR: path validation - {e}")
            return _deny_response(hook_event_name, f"Invalid path: {file_path}")

        # Check file access permissions using relative path
        if is_read:
            allowed = FileAccessController.can_read_file(agent_type, rel_path)
            operation = "read"
        else:
            allowed = FileAccessController.can_write_file(agent_type, rel_path)
            operation = "write"

        if allowed:
            return {}
        else:
            reason = _get_denial_reason(agent_type, rel_path, operation)
            logger.warning(f"[FILE_ACCESS_HOOK] {cve_id} | {agent_type.value} | DENIED: {operation} {rel_path} - {reason}")
            return _deny_response(hook_event_name, f"Access denied: {reason}")

    # Return hooks dictionary
    return {
        'PreToolUse': [
            HookMatcher(
                matcher=None,  # Match all tools
                hooks=[pre_tool_use_hook],
            )
        ]
    }


if __name__ == "__main__":
    # Test file access rules
    print("File Access Control Configuration\n" + "=" * 60)

    test_files = [
        'public.md',
        'task.yaml',
        'tests/test_vuln.py',
        'solution.sh',
        'Dockerfile',
        'docker-compose.yaml',
        'run-tests.sh',
        '.agent_state/analyzer_output/analysis.md',
        '.agent_state/validator_output/result.json',
        '.logs/orchestrator.log',
        '.tmp/cache.json',
    ]

    for agent_type in AgentType:
        print(f"\n{agent_type.value.upper()}:")
        print(f"  {'File':<45} {'Read':^6} {'Write':^6}")
        print(f"  {'-'*45} {'-'*6} {'-'*6}")
        for file in test_files:
            can_read = FileAccessController.can_read_file(agent_type, file)
            can_write = FileAccessController.can_write_file(agent_type, file)
            read_status = "✓" if can_read else "✗"
            write_status = "✓" if can_write else "✗"
            print(f"  {file:<45} {read_status:^6} {write_status:^6}")
