"""
Core Tools for Inspect Framework Evaluations

This module provides the essential tools for AI model evaluation:
- Python code execution (via Inspect framework)
- Bash console access (via Inspect framework)
- SageMath computation capabilities
"""

from inspect_ai.tool import tool, Tool, ToolResult
from inspect_ai.tool import python as inspect_python
from inspect_ai.tool import bash as inspect_bash
from inspect_ai.tool import web_search
from inspect_ai.tool._tool_description import set_tool_description, ToolDescription
from inspect_ai.tool._tool_params import ToolParams, ToolParam
from typing import Optional
import json
import re
import logging
import shlex
import os
import httpx
import asyncio

logger = logging.getLogger(__name__)


def python_with_custom_description(timeout: int | None = None, user: str | None = None, sandbox: str | None = None) -> Tool:
    """
    Wrapper for the Inspect python tool with custom description from prompts.yaml
    and memory limit detection.
    
    IMPORTANT: This wrapper bypasses the native python tool to directly call sandbox.exec()
    with timeout_retry=False to prevent automatic retries on timeout.
    """
    # Import sandbox utilities
    from inspect_ai.util import sandbox as sandbox_env
    
    # Create wrapper that adds memory detection and prevents timeout retries
    async def memory_aware_python(code: str) -> str:
        """Execute Python code with memory limit detection and no timeout retries."""
        try:
            # Use the 'timeout' command for proper process termination in Docker
            # Write code to a temporary file to avoid shell escaping issues
            import hashlib
            import os
            
            # Create a unique temporary file name
            code_hash = hashlib.md5(code.encode()).hexdigest()[:8]
            script_name = f"/tmp/python_script_{os.getpid()}_{code_hash}.py"
            
            # Create command that writes the code and executes it with timeout
            if timeout:
                # Use GNU timeout for proper process group termination
                exec_command = f'''
cat > {script_name} << 'PYTHON_EOF'
{code}
PYTHON_EOF
timeout --preserve-status --kill-after=5 {timeout} python3 {script_name}
exit_code=$?
rm -f {script_name}
exit $exit_code
'''
            else:
                # No timeout, run normally
                exec_command = f'''
cat > {script_name} << 'PYTHON_EOF'
{code}
PYTHON_EOF
python3 {script_name}
exit_code=$?
rm -f {script_name}
exit $exit_code
'''
            
            # Directly call sandbox exec with timeout_retry=False to prevent retries
            result = await sandbox_env(sandbox).exec(
                cmd=["bash", "--login", "-c", exec_command],
                timeout=timeout if timeout else None,
                user=user,
                timeout_retry=False  # CRITICAL: Disable automatic retries on timeout
            )
            
            # Build output string (matching native python tool behavior)
            output = ""
            if result.stderr:
                output = f"{result.stderr}\n"
            output = f"{output}{result.stdout}"
            
            # Check for memory-related errors in output
            if "MemoryError" in output or "Cannot allocate memory" in output:
                logger.warning("Python tool execution failed with MemoryError")
                return (
                    "ERROR: Memory allocation failed. The python execution failed due to insufficient memory "
                    "within the 8GB limit.\n\n" + output
                )
            
            # Check for memory limit kill (exit code 137)
            if result.returncode == 137:
                logger.warning("Python execution terminated due to memory limit (exit code 137)")
                return (
                    "ERROR: Memory limit exceeded. The python execution was terminated because it "
                    "exceeded the 8GB memory limit for tool use.\n\n" + output
                )
            
            return output
            
        except TimeoutError:
            # Even with timeout_retry=False, we still get TimeoutError on the first timeout
            logger.warning("Python code timed out")
            return "Python execution timed out before completing."
        except Exception as e:
            # Check if this is a timeout wrapped in another exception
            error_msg = str(e)
            if "timeout" in error_msg.lower() or "timed out" in error_msg.lower():
                logger.warning("Python code timed out (wrapped)")
                return "Python execution timed out before completing."
            # Re-raise other exceptions
            raise
    
    # Load custom description from centralized config
    from .config.prompt_config import get_prompt_config
    prompt_config = get_prompt_config()
    tool_desc = prompt_config.get_tool_description('python')
    
    # Set up parameters
    params = ToolParams()
    param_config = tool_desc.get('parameters', {}).get('code', {})
    params.properties['code'] = ToolParam(
        type=param_config.get('type', 'string'),
        description=param_config.get('description', 'The python code to execute.')
    )
    params.required = ['code']
    
    # Apply custom description
    set_tool_description(
        memory_aware_python,
        ToolDescription(
            name=tool_desc['name'],
            description=tool_desc['description'].strip(),
            parameters=params
        )
    )
    
    return memory_aware_python


def bash_with_custom_description(timeout: int | None = None, user: str | None = None, sandbox: str | None = None) -> Tool:
    """
    Wrapper for the Inspect bash tool with custom description from prompts.yaml
    and memory limit detection.
    
    IMPORTANT: This wrapper bypasses the native bash tool to directly call sandbox.exec()
    with timeout_retry=False to prevent automatic retries on timeout.
    """
    # Import sandbox utilities
    from inspect_ai.util import sandbox as sandbox_env
    
    # Create wrapper that adds memory detection and prevents timeout retries
    async def memory_aware_bash(cmd: str) -> str:
        """Execute bash command with memory limit detection and no timeout retries."""
        try:
            # Use the 'timeout' command which properly kills child processes
            # This is more reliable than exec in Docker environments
            # The timeout command sends SIGTERM then SIGKILL to the process group
            if timeout:
                # Use GNU timeout with --kill-after to ensure termination
                # The -s TERM sends SIGTERM first, then SIGKILL after 5 seconds
                wrapped_cmd = f"timeout --preserve-status --kill-after=5 {timeout} bash -c {shlex.quote(cmd)}"
            else:
                # No timeout specified, run normally
                wrapped_cmd = cmd
            
            # Directly call sandbox exec with timeout_retry=False to prevent retries
            result = await sandbox_env(sandbox).exec(
                cmd=["bash", "--login", "-c", wrapped_cmd],
                timeout=timeout,
                user=user,
                timeout_retry=False  # CRITICAL: Disable automatic retries on timeout
            )
            
            # Build output string (matching native bash tool behavior)
            output = ""
            if result.stderr:
                output = f"{result.stderr}\n"
            output = f"{output}{result.stdout}"
            
            # Check for memory-related errors in output
            if "Cannot allocate memory" in output or "Killed" in output:
                logger.warning("Bash command failed with memory error")
                return (
                    "ERROR: Memory allocation failed. The bash command failed due to insufficient memory "
                    "within the 8GB limit.\n\n" + output
                )
            
            # Check for memory limit kill (exit code 137)
            if result.returncode == 137:
                logger.warning("Bash command terminated due to memory limit (exit code 137)")
                return (
                    "ERROR: Memory limit exceeded. The bash command was terminated because it "
                    "exceeded the 8GB memory limit for tool use.\n\n" + output
                )
            
            return output
            
        except TimeoutError:
            # Even with timeout_retry=False, we still get TimeoutError on the first timeout
            logger.warning(f"Bash command timed out: {cmd[:50]}...")
            return "Command timed out before completing."
        except Exception as e:
            # Check if this is a timeout wrapped in another exception
            error_msg = str(e)
            if "timeout" in error_msg.lower() or "timed out" in error_msg.lower():
                logger.warning(f"Bash command timed out (wrapped): {cmd[:50]}...")
                return "Command timed out before completing."
            # Re-raise other exceptions
            raise
    
    # Load custom description from centralized config
    from .config.prompt_config import get_prompt_config
    prompt_config = get_prompt_config()
    tool_desc = prompt_config.get_tool_description('bash')
    
    # Set up parameters
    params = ToolParams()
    param_config = tool_desc.get('parameters', {}).get('cmd', {})
    params.properties['cmd'] = ToolParam(
        type=param_config.get('type', 'string'),
        description=param_config.get('description', 'The bash command to execute.')
    )
    params.required = ['cmd']
    
    # Apply custom description
    set_tool_description(
        memory_aware_bash,
        ToolDescription(
            name=tool_desc['name'],
            description=tool_desc['description'].strip(),
            parameters=params
        )
    )
    
    return memory_aware_bash


# Export the wrapped versions as the standard names
python = python_with_custom_description
bash = bash_with_custom_description



@tool
def sage_computation(timeout: int = 900):
    """
    Create a SageMath computation tool with configurable timeout.
    
    Args:
        timeout: Timeout in seconds for SageMath computations (default: 900 seconds = 15 minutes)
    """
    # Load custom description from centralized config
    from .config.prompt_config import get_prompt_config
    prompt_config = get_prompt_config()
    tool_desc = prompt_config.get_tool_description('sage_computation')
    
    async def execute_sage_computation(sage_code: str) -> str:
        # Inner function docstring will be what the model sees by default,
        # but we'll override it with set_tool_description below
        """Execute SageMath code with automatic preparsing for natural mathematical syntax."""
        # Use the bash tool to execute the actual sage command in the sandbox environment
        # This ensures packages installed via sage -pip are accessible
        # Note: The bash() tool already includes memory limit detection
        bash_tool = bash(timeout=timeout)
        
        # Create a temporary file to hold the sage code
        # This runs sage in script mode instead of interactive mode, avoiding "sage:" prompts
        import os
        
        # Generate a unique filename for the sage script
        script_name = f"/tmp/sage_script_{os.getpid()}_{id(sage_code)}.sage"
        
        # Create the file and execute it with sage
        # Using a here-document with quoted delimiter 'SAGE_EOF' prevents any expansion
        bash_command = f'''
# Write the sage code to a temporary file
cat > {script_name} << 'SAGE_EOF'
{sage_code}
SAGE_EOF

# Execute the sage script in non-interactive mode
sage -q {script_name}
exit_code=$?

# Clean up the temporary file
rm -f {script_name}

# Exit with the same code as sage
exit $exit_code
'''
        
        try:
            # Execute the command using the bash tool (runs in sandbox)
            result = await bash_tool(bash_command)
            
            # Check if the result indicates an error with sage not being found
            if "sage: command not found" in result or "sage: not found" in result:
                return "SageMath not available in the Docker environment. Please ensure SageMath is installed."
            
            return result
            
        except Exception as e:
            # If there's an error executing the bash command itself
            return f"Error executing SageMath code: {str(e)}"
    
    # Set up parameters for tool description
    params = ToolParams()
    param_config = tool_desc.get('parameters', {}).get('sage_code', {})
    params.properties['sage_code'] = ToolParam(
        type=param_config.get('type', 'string'),
        description=param_config.get('description', 'SageMath code using natural mathematical syntax')
    )
    params.required = ['sage_code']
    
    # Apply custom description for model view
    set_tool_description(
        execute_sage_computation,
        ToolDescription(
            name=tool_desc['name'],
            description=tool_desc['description'].strip(),
            parameters=params
        )
    )
    
    return execute_sage_computation




@tool
def submit() -> Tool:
    # Load custom description from centralized config
    from .config.prompt_config import get_prompt_config
    prompt_config = get_prompt_config()
    tool_desc = prompt_config.get_tool_description('submit')
    
    # Set the outer function docstring from config (for developer documentation)
    submit.__doc__ = tool_desc['description']
    
    async def execute(answer: str) -> ToolResult:
        """Submit your final answer for the current question or subquestion."""
        try:
            import os
            
            # Determine current stage from environment variable
            # This is set by the evaluator or stage transition logic
            current_stage = os.environ.get('PROOFBENCH_CURRENT_STAGE', 'main_question')
            
            print(f"✓ DEBUG: Submit tool called for stage: {current_stage}")
            print(f"  - Answer preview: {str(answer)[:100]}...")
            
            # Handle stage transitions for sequential workflow
            next_stage_content = _get_next_stage_and_transition(current_stage)
            
            # Use centralized prompt configuration
            from .config.prompt_config import get_prompt_config
            prompt_config = get_prompt_config()
            
            # Get answer delimiters from config
            answer_start_delimiter, answer_end_delimiter = prompt_config.get_answer_delimiters(current_stage)
            
            if next_stage_content:
                # There's a next stage - prompt the model with it
                response = prompt_config.get_submit_response(
                    'stage_success',
                    stage_name=current_stage,
                    answer_delimiter_start=answer_start_delimiter,
                    answer=answer,
                    answer_delimiter_end=answer_end_delimiter,
                    next_stage_content=next_stage_content
                )
            else:
                # All stages completed
                response = prompt_config.get_submit_response(
                    'final_success',
                    answer_delimiter_start=answer_start_delimiter,
                    answer=answer,
                    answer_delimiter_end=answer_end_delimiter
                )
            
            return response
            
        except Exception as e:
            # Use centralized prompt for error response
            from .config.prompt_config import get_prompt_config
            prompt_config = get_prompt_config()
            return prompt_config.get_submit_response('submission_error', error=str(e))
    
    # Set up parameters for tool description
    params = ToolParams()
    param_config = tool_desc.get('parameters', {}).get('answer', {})
    params.properties['answer'] = ToolParam(
        type=param_config.get('type', 'string'),
        description=param_config.get('description', 'Your final answer as Markdown text with LaTeX formulas')
    )
    params.required = ['answer']
    
    # Apply custom description for model view
    set_tool_description(
        execute,
        ToolDescription(
            name=tool_desc['name'],
            description=tool_desc['description'].strip(),
            parameters=params
        )
    )
    
    # Also set the execute function's docstring directly (this is what Inspect extracts)
    execute.__doc__ = tool_desc['description']
    
    return execute


def _get_next_stage_and_transition(current_stage: str) -> str:
    """
    Get the next stage content and transition to it.
    
    Args:
        current_stage: Current stage name (e.g., "main_question", "subquestion_a")
        
    Returns:
        Next stage content string, or empty string if no more stages
    """
    # For now, simplified approach - just say evaluation is complete
    # In a full implementation, you'd pass subquestion info through a different mechanism
    if current_stage == "main_question":
        # Could transition to first subquestion, but for now just complete
        return ""
    else:
        # All subquestions complete
        return ""


# List of mathematical tools available to AI models during evaluation
# Simplified to core tools: Python and Bash from Inspect framework, plus SageMath
# Note: We create a function to generate tools with proper timeout
def get_mathematical_tools(timeout: int = 900):
    """
    Get mathematical tools with specified timeout.
    
    Args:
        timeout: Timeout in seconds for tool execution (default: 900 seconds = 15 minutes)
    """
    return [
        sage_computation(timeout=timeout),  # SageMath for advanced mathematical computations
        # Note: python() and bash() are added separately from inspect_ai.tool in evaluator.py
        # Note: submit() tool is handled via AgentSubmit to avoid naming conflicts
    ]

# For backward compatibility, provide default tools with 900 second timeout
MATHEMATICAL_TOOLS = get_mathematical_tools(900)
