import configparser
import os
import asyncio
from typing import Dict, Any, Optional, Union
from openai import OpenAI, AsyncOpenAI

# Path to the configuration file
CONFIG_FILE_PATH = os.path.join(os.path.dirname(__file__), 'config.ini')

# Manual cache to store config sections to avoid repeated file reads
_CONFIG_CACHE: Dict[str, Dict[str, str]] = {}

def _load_config_section(alias: str) -> Dict[str, str]:
    """Loads a specific section from the config.ini file."""
    if alias in _CONFIG_CACHE:
        return _CONFIG_CACHE[alias]

    config = configparser.ConfigParser()
    if not os.path.exists(CONFIG_FILE_PATH):
        raise FileNotFoundError(f"Config file not found at {CONFIG_FILE_PATH}")
        
    config.read(CONFIG_FILE_PATH)

    if alias not in config:
        raise ValueError(f"Alias '{alias}' not found in {CONFIG_FILE_PATH}.")

    section = dict(config[alias])
    _CONFIG_CACHE[alias] = section
    return section

class LLMClientWrapper:
    """Wrapper for synchronous OpenAI client that remembers the model name."""
    def __init__(self, client: OpenAI, model_name: str):
        self.client = client
        self.model_name = model_name
        self.chat = self.ChatWrapper(self)

    class ChatWrapper:
        def __init__(self, wrapper):
            self.wrapper = wrapper
            self.completions = self.CompletionsWrapper(wrapper)

        class CompletionsWrapper:
            def __init__(self, wrapper):
                self.wrapper = wrapper

            def create(self, **kwargs):
                if 'model' not in kwargs:
                    kwargs['model'] = self.wrapper.model_name
                return self.wrapper.client.chat.completions.create(**kwargs)

class AsyncLLMClientWrapper:
    """Wrapper for asynchronous OpenAI client that remembers the model name and limits concurrency."""
    def __init__(self, client: AsyncOpenAI, model_name: str, max_concurrency: Optional[int] = None):
        self.client = client
        self.model_name = model_name
        self.semaphore = asyncio.Semaphore(max_concurrency) if max_concurrency else None
        self.chat = self.ChatWrapper(self)

    class ChatWrapper:
        def __init__(self, wrapper):
            self.wrapper = wrapper
            self.completions = self.CompletionsWrapper(wrapper)

        class CompletionsWrapper:
            def __init__(self, wrapper):
                self.wrapper = wrapper

            async def create(self, **kwargs):
                if 'model' not in kwargs:
                    kwargs['model'] = self.wrapper.model_name
                
                if self.wrapper.semaphore:
                    async with self.wrapper.semaphore:
                        return await self.wrapper.client.chat.completions.create(**kwargs)
                else:
                    return await self.wrapper.client.chat.completions.create(**kwargs)

def get_client(alias: str) -> LLMClientWrapper:
    """
    Returns a synchronous OpenAI client wrapper configured by alias.
    """
    config = _load_config_section(alias)
    api_key = config.get("api_key")
    base_url = config.get("base_url")
    model_name = config.get("default_model")
    
    if api_key and api_key.lower() == 'ollama':
        api_key = 'sk-ollama'

    client = OpenAI(api_key=api_key, base_url=base_url)
    return LLMClientWrapper(client, model_name)

def get_async_client(alias: str, max_concurrency: Optional[int] = None) -> AsyncLLMClientWrapper:
    """
    Returns an asynchronous OpenAI client wrapper configured by alias.
    
    Args:
        alias: The configuration alias (section name in config.ini).
        max_concurrency: Optional integer to limit the number of concurrent API calls.
                         Useful for preventing overload on local LLM servers (e.g., sglang).
    """
    config = _load_config_section(alias)
    api_key = config.get("api_key")
    base_url = config.get("base_url")
    model_name = config.get("default_model")

    if api_key and api_key.lower() == 'ollama':
        api_key = 'sk-ollama'

    client = AsyncOpenAI(api_key=api_key, base_url=base_url)
    return AsyncLLMClientWrapper(client, model_name, max_concurrency=max_concurrency)
