"""
Main client class for AWS Bedrock Claude models.
"""

import json
import logging
import time
from typing import Dict, List, Any, Optional, Iterator, Union
from dataclasses import dataclass

import boto3
from botocore.exceptions import ClientError, NoCredentialsError, PartialCredentialsError

from .config import (
    BedrockConfig, 
    CLAUDE_MODELS, 
    DEFAULT_SYSTEM_PROMPTS,
    supports_thinking,
    VALID_THINKING_MODES
)
from .exceptions import (
    BedrockClaudeError,
    BedrockAPIError,
    BedrockModelError,
    BedrockAuthError,
    BedrockConfigError,
    BedrockTokenExpiredError
)
from .tools import (
    Tool,
    ToolCall,
    ToolResult,
    ToolRegistry,
    ToolExecutor
)


@dataclass
class Message:
    """Represents a conversation message."""
    role: str  # 'user' or 'assistant'
    content: str


@dataclass
class BedrockResponse:
    """Represents a response from Bedrock Claude."""
    content: str
    usage: Dict[str, int]
    model: str
    stop_reason: Optional[str] = None
    thinking_content: Optional[str] = None  # For thinking-capable models
    tool_calls: Optional[List[ToolCall]] = None  # For tool use
    requires_tool_execution: bool = False
    raw_response: Optional[Dict[str, Any]] = None


class BedrockClaudeClient:
    """
    Client for interacting with AWS Bedrock Claude models.
    
    This client provides a simple interface to interact with Claude models
    through AWS Bedrock, supporting both single prompts and conversations.
    """
    
    def __init__(self, config: Optional[BedrockConfig] = None):
        """
        Initialize the Bedrock Claude client.
        
        Args:
            config: Configuration object. If None, loads from environment.
        """
        self.config = config or BedrockConfig.from_env()
        self.config.validate()
        
        # Setup logging
        if self.config.enable_logging:
            logging.basicConfig(
                level=logging.INFO,
                format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
            )
        self.logger = logging.getLogger(__name__)
        
        # Initialize AWS session and Bedrock client
        self._initialize_clients()
        
        # Tool use functionality
        self.tool_registry = ToolRegistry()
        self.tool_executor = ToolExecutor()
        
        # Conversation history
        self.conversation_history: List[Message] = []
    
    def _validate_thinking_params(
        self, 
        thinking_mode: Optional[str],
        thinking_max_tokens: Optional[int],
        thinking_temperature: Optional[float]
    ) -> None:
        """
        Validate thinking parameters.
        
        Args:
            thinking_mode: The thinking mode to validate
            thinking_max_tokens: The thinking max tokens to validate  
            thinking_temperature: The thinking temperature to validate
            
        Raises:
            BedrockConfigError: If any parameter is invalid
        """
        if thinking_mode and thinking_mode not in VALID_THINKING_MODES:
            raise BedrockConfigError(f"Invalid thinking_mode: {thinking_mode}. Valid options: {list(VALID_THINKING_MODES)}")
        
        if thinking_max_tokens and thinking_max_tokens <= 0:
            raise BedrockConfigError("thinking_max_tokens must be positive")
            
        if thinking_temperature and not 0 <= thinking_temperature <= 1:
            raise BedrockConfigError("thinking_temperature must be between 0 and 1")
    
    def _add_thinking_params(
        self, 
        body: Dict[str, Any], 
        model_id: str,
        thinking_mode: Optional[str],
        thinking_max_tokens: Optional[int],
        thinking_temperature: Optional[float],
        show_thinking: Optional[bool]
    ) -> None:
        """
        Add thinking parameters to request body if model supports thinking.
        
        Args:
            body: Request body to modify
            model_id: Model identifier
            thinking_mode: Thinking mode
            thinking_max_tokens: Max tokens for thinking
            thinking_temperature: Temperature for thinking
            show_thinking: Whether to show thinking in response
        """
        # Check if any thinking parameters are provided
        if not any([thinking_mode, thinking_max_tokens, thinking_temperature, show_thinking]):
            return
            
        # Only add thinking params for models that support it
        if not supports_thinking(model_id):
            self.logger.warning(f"Model {model_id} does not support thinking parameters. They will be ignored.")
            return
        
        # Validate thinking parameters
        self._validate_thinking_params(thinking_mode, thinking_max_tokens, thinking_temperature)
        
        # Set budget_tokens with minimum 1,024 as per AWS docs
        budget_tokens = thinking_max_tokens if thinking_max_tokens else 2000
        if budget_tokens < 1024:
            budget_tokens = 1024
            self.logger.info(f"Adjusted thinking budget to minimum 1,024 tokens")
        
        # AWS Bedrock thinking format
        thinking_config = {
            "type": "enabled",
            "budget_tokens": int(budget_tokens)
        }
        
        # Add thinking configuration to request body
        body["thinking"] = thinking_config
        
        # AWS Bedrock requires specific parameters when thinking is enabled
        if "temperature" in body:
            body["temperature"] = 1.0
            self.logger.info(f"Set temperature=1 as required by AWS Bedrock when thinking is enabled")
        
        # AWS Bedrock requires top_p >= 0.95 or removed when thinking is enabled
        if "top_p" in body:
            if body["top_p"] < 0.95:
                del body["top_p"]
                self.logger.info(f"Removed top_p parameter as it was < 0.95 and thinking is enabled")
            else:
                self.logger.info(f"Kept top_p={body['top_p']} as it meets AWS Bedrock thinking requirements")
        
        self.logger.info(f"Added thinking parameters: budget_tokens={budget_tokens}, model={model_id}")

    def _initialize_clients(self) -> None:
        """Initialize AWS session and Bedrock clients."""
        try:
            session_kwargs = self.config.get_aws_session_kwargs()
            self.session = boto3.Session(**session_kwargs)
            
            # Initialize Bedrock Runtime client
            self.bedrock_client = self.session.client(
                'bedrock-runtime',
                region_name=self.config.aws_region
            )
            
            # Initialize Bedrock client for model info
            self.bedrock_info_client = self.session.client(
                'bedrock',
                region_name=self.config.aws_region
            )
            
            self.logger.info(f"Initialized Bedrock client for region: {self.config.aws_region}")
            
        except (NoCredentialsError, PartialCredentialsError) as e:
            raise BedrockAuthError(f"AWS credentials error: {str(e)}")
        except Exception as e:
            raise BedrockClaudeError(f"Failed to initialize Bedrock client: {str(e)}")
    
    def send_prompt(
        self,
        prompt: str,
        model_id: Optional[str] = None,
        max_tokens: Optional[int] = None,
        temperature: Optional[float] = None,
        top_p: Optional[float] = None,
        system_prompt: Optional[str] = None,
        stop_sequences: Optional[List[str]] = None,
        thinking_mode: Optional[str] = None,
        thinking_max_tokens: Optional[int] = None,
        thinking_temperature: Optional[float] = None,
        show_thinking: Optional[bool] = False
    ) -> BedrockResponse:
        """
        Send a single prompt to Claude and get response.
        
        Args:
            prompt: The prompt to send
            model_id: Model ID to use (defaults to config default)
            max_tokens: Maximum tokens in response (defaults to config default)
            temperature: Sampling temperature (defaults to config default)
            top_p: Top-p sampling parameter (defaults to config default)
            system_prompt: System prompt (optional)
            stop_sequences: List of stop sequences (optional)
            thinking_mode: Mode for thinking process - "reasoning", "analysis", "exploration", "verification", "planning" (optional, thinking-capable models only)
            thinking_max_tokens: Maximum tokens for thinking process (optional, thinking-capable models only)
            thinking_temperature: Temperature for thinking process (optional, thinking-capable models only)
            show_thinking: Whether to include thinking process in response (optional, thinking-capable models only)
            
        Returns:
            BedrockResponse object with the response
        """
        # Use defaults from config if not provided
        model_id = model_id or self.config.default_model_id
        max_tokens = max_tokens or self.config.default_max_tokens
        temperature = temperature if temperature is not None else self.config.default_temperature
        top_p = top_p if top_p is not None else self.config.default_top_p
        
        # Prepare the request body
        messages = [{"role": "user", "content": prompt}]
        
        body = {
            "anthropic_version": "bedrock-2023-05-31",
            "max_tokens": max_tokens,
            "temperature": temperature,
            "top_p": top_p,
            "messages": messages
        }
        
        if system_prompt:
            body["system"] = system_prompt
            
        if stop_sequences:
            body["stop_sequences"] = stop_sequences
        
        # Add thinking parameters if applicable
        self._add_thinking_params(
            body, model_id, thinking_mode, thinking_max_tokens, 
            thinking_temperature, show_thinking
        )
        
        return self._invoke_model(model_id, body)
    
    def send_conversation(
        self,
        messages: List[Message],
        model_id: Optional[str] = None,
        max_tokens: Optional[int] = None,
        temperature: Optional[float] = None,
        top_p: Optional[float] = None,
        system_prompt: Optional[str] = None,
        stop_sequences: Optional[List[str]] = None
    ) -> BedrockResponse:
        """
        Send a conversation (multiple messages) to Claude.
        
        Args:
            messages: List of Message objects
            model_id: Model ID to use (defaults to config default)
            max_tokens: Maximum tokens in response (defaults to config default)
            temperature: Sampling temperature (defaults to config default)
            top_p: Top-p sampling parameter (defaults to config default)
            system_prompt: System prompt (optional)
            stop_sequences: List of stop sequences (optional)
            
        Returns:
            BedrockResponse object with the response
        """
        # Use defaults from config if not provided
        model_id = model_id or self.config.default_model_id
        max_tokens = max_tokens or self.config.default_max_tokens
        temperature = temperature if temperature is not None else self.config.default_temperature
        top_p = top_p if top_p is not None else self.config.default_top_p
        
        # Convert Message objects to API format
        api_messages = [{"role": msg.role, "content": msg.content} for msg in messages]
        
        # Prepare the request body
        body = {
            "anthropic_version": "bedrock-2023-05-31",
            "max_tokens": max_tokens,
            "temperature": temperature,
            "top_p": top_p,
            "messages": api_messages
        }
        
        if system_prompt:
            body["system"] = system_prompt
            
        if stop_sequences:
            body["stop_sequences"] = stop_sequences
        
        return self._invoke_model(model_id, body)
    
    def stream_response(
        self,
        prompt: str,
        model_id: Optional[str] = None,
        max_tokens: Optional[int] = None,
        temperature: Optional[float] = None,
        top_p: Optional[float] = None,
        system_prompt: Optional[str] = None,
        stop_sequences: Optional[List[str]] = None
    ) -> Iterator[str]:
        """
        Stream response from Claude.
        
        Args:
            prompt: The prompt to send
            model_id: Model ID to use (defaults to config default)
            max_tokens: Maximum tokens in response (defaults to config default)
            temperature: Sampling temperature (defaults to config default)
            top_p: Top-p sampling parameter (defaults to config default)
            system_prompt: System prompt (optional)
            stop_sequences: List of stop sequences (optional)
            
        Yields:
            String chunks of the response
        """
        # Use defaults from config if not provided
        model_id = model_id or self.config.default_model_id
        max_tokens = max_tokens or self.config.default_max_tokens
        temperature = temperature if temperature is not None else self.config.default_temperature
        top_p = top_p if top_p is not None else self.config.default_top_p
        
        # Prepare the request body
        messages = [{"role": "user", "content": prompt}]
        
        body = {
            "anthropic_version": "bedrock-2023-05-31",
            "max_tokens": max_tokens,
            "temperature": temperature,
            "top_p": top_p,
            "messages": messages
        }
        
        if system_prompt:
            body["system"] = system_prompt
            
        if stop_sequences:
            body["stop_sequences"] = stop_sequences
        
        try:
            response = self.bedrock_client.invoke_model_with_response_stream(
                modelId=model_id,
                body=json.dumps(body)
            )
            
            for event in response['body']:
                chunk = json.loads(event['chunk']['bytes'])
                
                if chunk['type'] == 'content_block_delta':
                    if 'delta' in chunk and 'text' in chunk['delta']:
                        yield chunk['delta']['text']
                        
        except ClientError as e:
            self._handle_client_error(e)
        except Exception as e:
            raise BedrockClaudeError(f"Streaming error: {str(e)}")
    
    def _invoke_model(self, model_id: str, body: Dict[str, Any]) -> BedrockResponse:
        """
        Invoke the Bedrock model with enhanced retry logic for token expiration.
        
        Args:
            model_id: The model ID to invoke
            body: Request body
            
        Returns:
            BedrockResponse object
        """
        max_token_retries = 3  # Additional retries specifically for token expiration
        token_retry_count = 0
        
        for attempt in range(self.config.retry_attempts + 1):
            try:
                self.logger.info(f"Invoking model {model_id} (attempt {attempt + 1})")
                
                response = self.bedrock_client.invoke_model(
                    modelId=model_id,
                    body=json.dumps(body)
                )
                
                # Parse response
                response_body = json.loads(response['body'].read())
                
                # Extract content and thinking from response
                content = ""
                thinking_content = None
                thinking_block = None
                
                if 'content' in response_body:
                    for content_block in response_body['content']:
                        if content_block['type'] == 'text':
                            content += content_block['text']
                        elif content_block['type'] == 'thinking':
                            # AWS Bedrock thinking format - preserve full block
                            thinking_content = content_block.get('thinking', '')
                            thinking_block = content_block  # Preserve entire block with signature
                
                # Create response object
                bedrock_response = BedrockResponse(
                    content=content,
                    usage=response_body.get('usage', {}),
                    model=model_id,
                    stop_reason=response_body.get('stop_reason'),
                    thinking_content=thinking_content,
                    raw_response=response_body
                )
                
                self.logger.info(f"Successfully got response from {model_id}")
                return bedrock_response
                
            except BedrockTokenExpiredError as e:
                # Token was refreshed, retry immediately with separate counter
                token_retry_count += 1
                if token_retry_count <= max_token_retries:
                    self.logger.info(f"Retrying after token refresh (token retry {token_retry_count}/{max_token_retries})")
                    time.sleep(1)  # Brief pause after token refresh
                    continue
                else:
                    self.logger.error(f"Max token refresh retries ({max_token_retries}) exceeded")
                    raise BedrockClaudeError(f"Token refresh failed after {max_token_retries} attempts")
            except ClientError as e:
                error_code = e.response['Error']['Code']
                if error_code == 'ExpiredTokenException':
                    # Handle token expiration directly here as well
                    token_retry_count += 1
                    if token_retry_count <= max_token_retries:
                        self.logger.info(f"Direct token expiration, refreshing session (retry {token_retry_count}/{max_token_retries})")
                        self._initialize_clients()  # Refresh session
                        time.sleep(1)  # Brief pause after refresh
                        continue
                    else:
                        self.logger.error(f"Max token refresh retries ({max_token_retries}) exceeded")
                        raise BedrockClaudeError(f"Token refresh failed after {max_token_retries} attempts")
                
                if attempt == self.config.retry_attempts:
                    self._handle_client_error(e)
                else:
                    wait_time = 2 ** attempt  # Exponential backoff
                    self.logger.warning(f"Retrying in {wait_time} seconds due to error: {str(e)}")
                    time.sleep(wait_time)
            except Exception as e:
                if attempt == self.config.retry_attempts:
                    raise BedrockClaudeError(f"Model invocation error: {str(e)}")
                else:
                    wait_time = 2 ** attempt
                    self.logger.warning(f"Retrying in {wait_time} seconds due to error: {str(e)}")
                    time.sleep(wait_time)
    
    def _handle_client_error(self, error: ClientError) -> None:
        """Handle AWS ClientError and convert to appropriate exception."""
        error_code = error.response['Error']['Code']
        error_message = error.response['Error']['Message']
        status_code = error.response['ResponseMetadata']['HTTPStatusCode']
        
        if error_code == 'ExpiredTokenException':
            self.logger.info("AWS token expired, refreshing session...")
            self._initialize_clients()  # Refresh session
            raise BedrockTokenExpiredError(f"Token expired and refreshed: {error_message}")
        elif error_code == 'AccessDeniedException':
            raise BedrockAuthError(f"Access denied: {error_message}")
        elif error_code == 'ValidationException':
            raise BedrockModelError(f"Validation error: {error_message}")
        elif error_code == 'ResourceNotFoundException':
            raise BedrockModelError(f"Model not found: {error_message}")
        elif error_code == 'ThrottlingException':
            raise BedrockAPIError(f"Rate limit exceeded: {error_message}", error_code, status_code)
        elif error_code == 'ServiceUnavailableException':
            raise BedrockAPIError(f"Service unavailable: {error_message}", error_code, status_code)
        else:
            raise BedrockAPIError(f"API error: {error_message}", error_code, status_code)
    
    def add_to_conversation(self, role: str, content: str) -> None:
        """
        Add a message to the conversation history.
        
        Args:
            role: 'user' or 'assistant'
            content: Message content
        """
        if role not in ['user', 'assistant']:
            raise ValueError("Role must be 'user' or 'assistant'")
            
        self.conversation_history.append(Message(role=role, content=content))
    
    def clear_conversation(self) -> None:
        """Clear the conversation history."""
        self.conversation_history = []
    
    def get_conversation_history(self) -> List[Message]:
        """Get the current conversation history."""
        return self.conversation_history.copy()
    
    def continue_conversation(
        self,
        user_message: str,
        model_id: Optional[str] = None,
        max_tokens: Optional[int] = None,
        temperature: Optional[float] = None,
        top_p: Optional[float] = None,
        system_prompt: Optional[str] = None,
        stop_sequences: Optional[List[str]] = None
    ) -> BedrockResponse:
        """
        Continue the current conversation with a new user message.
        
        Args:
            user_message: New message from user
            model_id: Model ID to use (defaults to config default)
            max_tokens: Maximum tokens in response (defaults to config default)
            temperature: Sampling temperature (defaults to config default)
            top_p: Top-p sampling parameter (defaults to config default)
            system_prompt: System prompt (optional)
            stop_sequences: List of stop sequences (optional)
            
        Returns:
            BedrockResponse object with the response
        """
        # Add user message to history
        self.add_to_conversation("user", user_message)
        
        # Send conversation
        response = self.send_conversation(
            messages=self.conversation_history,
            model_id=model_id,
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            system_prompt=system_prompt,
            stop_sequences=stop_sequences
        )
        
        # Add assistant response to history
        self.add_to_conversation("assistant", response.content)
        
        return response
    
    def list_available_models(self) -> List[Dict[str, Any]]:
        """
        List available Claude models in Bedrock.
        
        Returns:
            List of model information dictionaries
        """
        try:
            response = self.bedrock_info_client.list_foundation_models(
                byProvider='anthropic'
            )
            return response.get('modelSummaries', [])
        except ClientError as e:
            self._handle_client_error(e)
        except Exception as e:
            raise BedrockClaudeError(f"Failed to list models: {str(e)}")
    
    def get_model_info(self, model_id: str) -> Dict[str, Any]:
        """
        Get information about a specific model.
        
        Args:
            model_id: The model ID to get info for
            
        Returns:
            Model information dictionary
        """
        try:
            response = self.bedrock_info_client.get_foundation_model(modelIdentifier=model_id)
            return response.get('modelDetails', {})
        except ClientError as e:
            self._handle_client_error(e)
        except Exception as e:
            raise BedrockClaudeError(f"Failed to get model info: {str(e)}")
    
    # ============================================================================
    # TOOL USE FUNCTIONALITY
    # ============================================================================
    
    def register_tool(self, tool: Tool) -> None:
        """
        Register a tool for use in conversations.
        
        Args:
            tool: Tool object to register
        """
        self.tool_registry.register(tool)
    
    def unregister_tool(self, tool_name: str) -> None:
        """
        Remove a tool from the registry.
        
        Args:
            tool_name: Name of the tool to remove
        """
        self.tool_registry.unregister(tool_name)
    
    def list_registered_tools(self) -> List[str]:
        """
        Get list of registered tool names.
        
        Returns:
            List of tool names
        """
        return self.tool_registry.list_tools()
    
    def send_prompt_with_tools(
        self,
        prompt: str,
        tools: Optional[List[str]] = None,
        tool_choice: str = "auto",
        execution_mode: str = "parallel",
        model_id: Optional[str] = None,
        max_tokens: Optional[int] = None,
        temperature: Optional[float] = None,
        top_p: Optional[float] = None,
        system_prompt: Optional[str] = None,
        stop_sequences: Optional[List[str]] = None,
        max_iterations: int = 5,
        thinking_mode: Optional[str] = None,
        thinking_max_tokens: Optional[int] = None,
        thinking_temperature: Optional[float] = None,
        show_thinking: Optional[bool] = False
    ) -> BedrockResponse:
        """
        Send a prompt with tool use capability.
        
        Args:
            prompt: The prompt to send
            tools: List of tool names to make available (None for all registered tools)
            tool_choice: "auto", "any", "none" - how Claude should use tools
            execution_mode: "serial" or "parallel" - how to execute multiple tools
            model_id: Model ID to use (defaults to config default)
            max_tokens: Maximum tokens in response (defaults to config default)
            temperature: Sampling temperature (defaults to config default)
            top_p: Top-p sampling parameter (defaults to config default)
            system_prompt: System prompt (optional)
            stop_sequences: List of stop sequences (optional)
            max_iterations: Maximum tool execution iterations
            thinking_mode: Mode for thinking process - "reasoning", "analysis", "exploration", "verification", "planning" (optional, thinking-capable models only)
            thinking_max_tokens: Maximum tokens for thinking process (optional, thinking-capable models only)
            thinking_temperature: Temperature for thinking process (optional, thinking-capable models only)
            show_thinking: Whether to include thinking process in response (optional, thinking-capable models only)
            
        Returns:
            BedrockResponse object with the final response
        """
        # Get available tools
        available_tools = self.tool_registry.get_tools_for_claude(tools)
        
        if not available_tools and tool_choice != "none":
            self.logger.warning("No tools available but tool_choice is not 'none'")
        
        # Use defaults from config if not provided
        model_id = model_id or self.config.default_model_id
        max_tokens = max_tokens or self.config.default_max_tokens
        temperature = temperature if temperature is not None else self.config.default_temperature
        top_p = top_p if top_p is not None else self.config.default_top_p
        
        # Prepare the initial request body
        messages = [{"role": "user", "content": prompt}]
        
        body = {
            "anthropic_version": "bedrock-2023-05-31",
            "max_tokens": max_tokens,
            "temperature": temperature,
            "top_p": top_p,
            "messages": messages
        }
        
        if system_prompt:
            body["system"] = system_prompt
            
        if stop_sequences:
            body["stop_sequences"] = stop_sequences
        
        # Add tools to the request if available
        if available_tools and tool_choice != "none":
            body["tools"] = available_tools
            
            # AWS Bedrock thinking + tools compatibility
            if any([thinking_mode, thinking_max_tokens, thinking_temperature, show_thinking]):
                if tool_choice not in ["auto", "none"]:
                    self.logger.warning(f"Tool choice '{tool_choice}' forces tool use and is incompatible with thinking. Using 'auto' instead.")
                    tool_choice = "auto"
                # Don't set tool_choice when thinking is enabled - let Claude decide
                # (AWS Bedrock doesn't allow forced tool use with thinking)
            else:
                # Only set explicit tool_choice when thinking is NOT enabled
                if tool_choice != "auto":
                    body["tool_choice"] = {"type": tool_choice}
        
        return self._handle_tool_conversation_iteration(
            body, model_id, execution_mode, max_iterations,
            thinking_mode, thinking_max_tokens, thinking_temperature, show_thinking
        )
    
    def _handle_tool_conversation_iteration(
        self,
        body: Dict[str, Any],
        model_id: str,
        execution_mode: str,
        max_iterations: int,
        thinking_mode: Optional[str],
        thinking_max_tokens: Optional[int],
        thinking_temperature: Optional[float],
        show_thinking: Optional[bool]
    ) -> BedrockResponse:
        """
        Handle tool conversation with multiple iterations.
        
        Args:
            body: Request body
            model_id: Model ID
            execution_mode: "serial" or "parallel"
            max_iterations: Maximum iterations
            thinking_mode: Thinking mode for each iteration
            thinking_max_tokens: Max tokens for thinking
            thinking_temperature: Temperature for thinking
            show_thinking: Whether to show thinking in response
            
        Returns:
            BedrockResponse with final result
        """
        conversation_messages = body["messages"].copy()
        iterations = 0
        
        while iterations < max_iterations:
            iterations += 1
            self.logger.info(f"Tool conversation iteration {iterations}")
            
            # Update body with current conversation state
            body["messages"] = conversation_messages
            
            # Add thinking parameters for each iteration
            self._add_thinking_params(
                body, model_id, thinking_mode, thinking_max_tokens,
                thinking_temperature, show_thinking
            )
            
            # Get response from Claude
            response = self._invoke_model(model_id, body)
            
            # Check if Claude wants to use tools
            tool_calls = self._extract_tool_calls(response.raw_response)
            
            if not tool_calls:
                # No tool calls, conversation is complete
                response.tool_calls = []
                response.requires_tool_execution = False
                return response
            
            # Execute tools
            self.logger.info(f"Executing {len(tool_calls)} tool(s) in {execution_mode} mode")
            
            # Get tools dict for execution
            tools_dict = {name: self.tool_registry.get(name) 
                         for name in self.tool_registry.list_tools()}
            
            # Execute tools based on mode
            if execution_mode == "serial":
                tool_results = self.tool_executor.execute_serial(tools_dict, tool_calls)
            else:  # parallel
                tool_results = self.tool_executor.execute_parallel(tools_dict, tool_calls)
            
            # Add assistant message with tool calls to conversation
            # AWS Bedrock requires thinking blocks to be preserved when thinking is enabled
            assistant_content = []
            
            # Include complete thinking block if it exists (required by AWS Bedrock)
            thinking_block = self._get_thinking_block_from_response(response.raw_response)
            if thinking_block:
                self.logger.info(f"Preserving thinking block: {thinking_block.get('type', 'unknown_type')}")
                assistant_content.append(thinking_block)
            else:
                self.logger.warning("No thinking block found in response - this may cause AWS Bedrock validation errors")
            
            # Add tool calls
            assistant_content.extend(self._format_tool_calls_for_conversation(tool_calls))
            
            # Debug: Log the complete assistant message structure
            self.logger.info(f"Assistant message structure: {len(assistant_content)} content blocks, types: {[block.get('type') for block in assistant_content]}")
            
            conversation_messages.append({
                "role": "assistant", 
                "content": assistant_content
            })
            
            # Add tool results to conversation
            conversation_messages.append({
                "role": "user",
                "content": self._format_tool_results_for_conversation(tool_results)
            })
        
        # Max iterations reached
        self.logger.warning(f"Tool conversation reached max iterations ({max_iterations})")
        response.tool_calls = tool_calls
        response.requires_tool_execution = True
        return response
    
    def _get_thinking_block_from_response(self, raw_response: Dict[str, Any]) -> Optional[Dict[str, Any]]:
        """
        Extract the complete thinking block from the response (required for AWS Bedrock).
        
        Args:
            raw_response: Raw response from Bedrock
            
        Returns:
            Complete thinking block or None if not found
        """
        if 'content' in raw_response:
            for content_block in raw_response['content']:
                if content_block.get('type') == 'thinking':
                    return content_block
        return None
    
    def _extract_tool_calls(self, raw_response: Dict[str, Any]) -> List[ToolCall]:
        """
        Extract tool calls from Claude's response.
        
        Args:
            raw_response: Raw response from Bedrock
            
        Returns:
            List of ToolCall objects
        """
        tool_calls = []
        
        if 'content' in raw_response:
            for content_block in raw_response['content']:
                if content_block.get('type') == 'tool_use':
                    tool_call = ToolCall.from_claude_response(content_block)
                    tool_calls.append(tool_call)
        
        return tool_calls
    
    def _format_tool_calls_for_conversation(self, tool_calls: List[ToolCall]) -> List[Dict[str, Any]]:
        """
        Format tool calls for conversation history.
        
        Args:
            tool_calls: List of tool calls
            
        Returns:
            Formatted content blocks
        """
        content_blocks = []
        
        for tool_call in tool_calls:
            content_blocks.append({
                "type": "tool_use",
                "id": tool_call.id,
                "name": tool_call.name,
                "input": tool_call.input
            })
        
        return content_blocks
    
    def _format_tool_results_for_conversation(self, tool_results: List[ToolResult]) -> List[Dict[str, Any]]:
        """
        Format tool results for conversation history.
        
        Args:
            tool_results: List of tool results
            
        Returns:
            Formatted content blocks
        """
        content_blocks = []
        
        for result in tool_results:
            content_blocks.append(result.to_claude_format())
        
        return content_blocks
    
    def execute_tools_serial(self, tool_calls: List[ToolCall]) -> List[ToolResult]:
        """
        Execute tools one by one in sequence.
        
        Args:
            tool_calls: List of tool calls to execute
            
        Returns:
            List of tool results
        """
        tools_dict = {name: self.tool_registry.get(name) 
                     for name in self.tool_registry.list_tools()}
        return self.tool_executor.execute_serial(tools_dict, tool_calls)
    
    def execute_tools_parallel(self, tool_calls: List[ToolCall]) -> List[ToolResult]:
        """
        Execute tools concurrently.
        
        Args:
            tool_calls: List of tool calls to execute
            
        Returns:
            List of tool results
        """
        tools_dict = {name: self.tool_registry.get(name) 
                     for name in self.tool_registry.list_tools()}
        return self.tool_executor.execute_parallel(tools_dict, tool_calls)
