from typing import Dict, List
from tenacity import retry, stop_after_attempt, stop_after_delay, wait_exponential, retry_if_exception_type

from src.configs.provider import OpenAILLMConfig, AzureLLMConfig
from src.providers.base import BaseLLM, LLMOutput
from src.providers.utils import remove_thinking_tags
import logging
logger = logging.getLogger(__name__)
def log_attempt(retry_state):
    logger.info(f"retry_state.attempt_number: {retry_state.attempt_number}")
class OpenAILLM(BaseLLM):
    """
    OpenAI-compatible LLM provider.
    
    Supports both OpenAI's official API and any OpenAI-compatible services
    (like local models served via vLLM, ollama, etc.).
    """
    def __init__(
        self, config: OpenAILLMConfig
    ):
        from openai import OpenAI
        self.config = config
        self.client = OpenAI(api_key=config.api_key, base_url=config.base_url)
    
    @retry(
        stop=(stop_after_delay(300) | stop_after_attempt(20)),
        wait=wait_exponential(multiplier=2, min=4, max=30),
        retry=retry_if_exception_type(Exception),
        before_sleep=log_attempt,
    )
    def generate(self, messages: List[Dict], **kwargs) -> LLMOutput:
        """
        Generate response using OpenAI Chat Completions API.
        
        Args:
            messages: List of message dictionaries with 'role' and 'content'
            **kwargs: Generation parameters (temperature, max_tokens, etc.)
            
        Returns:
            Generated response text
            
        Raises:
            LLMError: If generation fails after retries
        """
        temperature = kwargs.get("temperature", self.config.temperature)
        max_tokens = kwargs.get("max_tokens", self.config.max_tokens)
        max_completion_tokens = kwargs.get("max_completion_tokens", self.config.max_completion_tokens)
        top_p = kwargs.get("top_p", self.config.top_p)
        extract_first_block = kwargs.get("extract_first_block", False)
        language_type = kwargs.get("language_type", None)
        
        response = self.client.chat.completions.create(
            model=self.config.model_name_or_path,
            messages=messages,
            temperature=temperature,
            max_tokens=max_tokens,
            top_p=top_p,
        )
        logger.debug("OpenAI response: %s", response)
        
        llm_output: LLMOutput = LLMOutput.from_response(response)
        if self.config.remove_think_prefix:
            llm_output.response_txt = remove_thinking_tags(llm_output.response_txt)
        if extract_first_block:
            llm_output.response_txt = LLMOutput.extract_first_block_from_response(llm_output.response_txt, language_types=[language_type])
        return llm_output

    def generate_single(self, message: str, **kwargs) -> LLMOutput:
        """
        Generate response for a single-turn prompt.
        
        Args:
            content: The user's single-turn prompt content
            **kwargs: Generation parameters (temperature, max_tokens, etc.)
        
        Returns:
            Generated response text
        """
        messages = [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": message}
        ]
        return self.generate(messages, **kwargs)
    
    
class AzureLLM(BaseLLM):
    """Azure OpenAI LLM class with singleton pattern."""
    def __init__(self, config: AzureLLMConfig):
        from openai import AzureOpenAI
        self.config = config
        self.client = AzureOpenAI(
            api_key=config.api_key, 
            azure_endpoint=config.base_url, 
            api_version=config.api_version
        )
    
    @retry(
        stop=(stop_after_delay(300) | stop_after_attempt(20)),
        wait=wait_exponential(multiplier=2, min=4, max=30),
        retry=retry_if_exception_type(Exception),
        before_sleep=log_attempt,
    )
    def generate(self, messages: List[Dict], **kwargs) -> LLMOutput:
        """
        Generate response using OpenAI Chat Completions API.
        
        Args:
            messages: List of message dictionaries with 'role' and 'content'
            **kwargs: Generation parameters (temperature, max_tokens, etc.)
            
        Returns:
            Generated response text
            
        Raises:
            LLMError: If generation fails after retries
        """
        temperature = kwargs.get("temperature", self.config.temperature)
        max_tokens = kwargs.get("max_tokens", self.config.max_tokens)
        max_completion_tokens = kwargs.get("max_completion_tokens", self.config.max_completion_tokens)
        top_p = kwargs.get("top_p", self.config.top_p)
        extract_first_block = kwargs.get("extract_first_block", False)
        language_type = kwargs.get("language_type", None)
        response = self.client.chat.completions.create(
            model=self.config.model_name_or_path,
            messages=messages,
            temperature=temperature,
            max_completion_tokens=max_completion_tokens,
            top_p=top_p,
        )
        logger.debug(f"Azure LLM response: {response}")

        llm_output: LLMOutput = LLMOutput.from_response(response)
        if self.config.remove_think_prefix:
            llm_output.response_txt = remove_thinking_tags(llm_output.response_txt)
        if extract_first_block:
            llm_output.response_txt = LLMOutput.extract_first_block_from_response(llm_output.response_txt, language_types=[language_type])
            
        return llm_output

    def generate_single(self, message: str, **kwargs) -> LLMOutput:
        """
        Generate response for a single-turn prompt.
        
        Args:
            content: The user's single-turn prompt content
            **kwargs: Generation parameters (temperature, max_tokens, etc.)
        
        Returns:
            Generated response text
        """
        messages = [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": message}
        ]
        return self.generate(messages, **kwargs)
        
    
        
    