from openai import OpenAI
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential


@retry(
    retry=retry_if_exception_type((Exception)),
    stop=stop_after_attempt(2),
    wait=wait_exponential(multiplier=1, min=3, max=5),
    before_sleep=lambda retry_state: print(
        f"LLM call failed with {retry_state.outcome.exception()}. "
        f"(Retry {retry_state.attempt_number})"
    )
)
def create_chat_completion_with_retry(llm_client: OpenAI, model_name: str, **kwargs):
    kwargs['model'] = model_name
    kwargs['max_tokens'] = 4096
    is_thinking = kwargs.get('extra_body', {}).get('enable_thinking', True)
    is_stream = (model_name == 'qwen3-8b') and is_thinking

    if not is_stream:
        if not is_thinking:
            kwargs['extra_body'] = {"enable_thinking": False}
            kwargs['extra_body']["chat_template_kwargs"] = {
                "enable_thinking": False}
        response = llm_client.chat.completions.create(**kwargs)
        return response.choices[0].message
    else:
        kwargs['stream'] = True
        completion_stream = llm_client.chat.completions.create(**kwargs)

        reason_content = ""
        final_content = ""
        function_call_name = ""
        function_call_arguments = ""

        for chunk in completion_stream:
            if not chunk.choices:
                continue
            delta = chunk.choices[0].delta
            if not delta:
                print("Received None delta object, skipping this chunk.")
                continue

            reason_content += getattr(delta, 'reasoning_content', '') or ''
            final_content += getattr(delta, 'content', '') or ''

            if getattr(delta, 'tool_calls', None):
                if len(delta.tool_calls) > 1:
                    raise NotImplementedError(
                        "Multiple tool calls are not supported in this implementation.")
                for tool_call in delta.tool_calls:
                    if tool_call.function:
                        function_call_name += getattr(
                            tool_call.function, 'name', '') or ''
                        function_call_arguments += getattr(
                            tool_call.function, 'arguments', '') or ''

        response_message = {
            "role": "assistant",
            "content": final_content.strip(),
            "reasoning_content": reason_content.strip(),
        }

        if function_call_name:
            response_message["tool_calls"] = [{
                "name": function_call_name,
                "arguments": function_call_arguments
            }]

        return response_message
