import asyncio
import logging
import time
from pathlib import Path
from traceback import format_exc

from together import AsyncTogether
from together.error import InvalidRequestError, ServiceUnavailableError

from safetytooling.data_models import LLMResponse, Prompt

from .model import InferenceAPIModel

TOGETHER_MODELS = {
    # Meta/Llama models
    "meta-llama/Llama-2-13b-chat-hf",
    "meta-llama/Llama-3.2-1B-Instruct",
    "meta-llama/Llama-3.2-3B-Instruct-Turbo",
    "meta-llama/Llama-3.3-70B-Instruct-Reference",
    "meta-llama/Llama-3.3-70B-Instruct-Turbo",
    "meta-llama/Llama-4-Scout-17B-16E-Instruct",
    "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
    "meta-llama/Meta-Llama-3-8B-Instruct",
    "meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
    "meta-llama/Meta-Llama-3-70B-Instruct-Turbo",
    "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
    "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
    "meta-llama/Meta-Llama-3.1-70B-Instruct-Reference",
    "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
    "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
    # Mistral models
    "mistralai/Mistral-7B-v0.1",
    "mistralai/Mixtral-8x7B-Instruct-v0.1",
    "mistralai/Mixtral-8x22B-Instruct-v0.1",
    # Google models
    "google/gemma-2-9b-it",
    "google/gemma-2-27b-it",
    # DeepSeek models
    "deepseek-ai/DeepSeek-R1",
    "deepseek-ai/DeepSeek-V3",
    "deepseek-ai/deepseek-llm-67b-chat",
    # Qwen models
    "Qwen/Qwen2.5-Coder-32B-Instruct",
    "Qwen/Qwen2.5-72B-Instruct-Turbo",
    "Qwen/QwQ-32B-Preview",
    # OpenAI models
    "gpt-4.5-preview-2025-02-27",
    # Anthropic models
    "claude-3-7-sonnet-20250219",
    # NVIDIA models
    "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
}
LOGGER = logging.getLogger(__name__)


class TogetherChatModel(InferenceAPIModel):
    def __init__(
        self,
        num_threads: int,
        prompt_history_dir: Path | None = None,
        api_key: str | None = None,
    ):
        self.num_threads = num_threads
        self.prompt_history_dir = prompt_history_dir
        if api_key is not None:
            self.aclient = AsyncTogether(api_key=api_key)
        else:
            self.aclient = None
        self.available_requests = asyncio.BoundedSemaphore(int(self.num_threads))

    @staticmethod
    def convert_top_logprobs(data) -> list[dict]:
        # convert TogetherAI chat version of logprobs response to match OpenAI completion version
        # note that TogetherAI only supports one logprob per token

        top_logprobs = []

        for token, logprob in zip(data.tokens, data.token_logprobs):
            top_logprobs.append({token: logprob})

        return top_logprobs

    async def __call__(
        self,
        model_id: str,
        prompt: Prompt,
        print_prompt_and_response: bool,
        max_attempts: int,
        is_valid=lambda x: True,
        **kwargs,
    ) -> list[LLMResponse]:
        if self.aclient is None:
            raise RuntimeError("TOGETHER_API_KEY environment variable must be set in .env before running your script")
        start = time.time()

        prompt_file = self.create_prompt_history_file(prompt, model_id, self.prompt_history_dir)

        if "logprobs" in kwargs:
            kwargs["top_logprobs"] = kwargs["logprobs"]
            kwargs["logprobs"] = True

        LOGGER.debug(f"Making {model_id} call")
        response_data = None
        duration = None

        error_list = []
        for i in range(max_attempts):
            try:
                async with self.available_requests:
                    api_start = time.time()

                    response_data = await self.aclient.chat.completions.create(
                        messages=prompt.together_format(),
                        model=model_id,
                        **kwargs,
                    )
                    api_duration = time.time() - api_start
                    if not is_valid(response_data):
                        raise RuntimeError(f"Invalid response according to is_valid {response_data}")
            except (TypeError, InvalidRequestError) as e:
                raise e
            except ServiceUnavailableError:
                LOGGER.warn(f"Service Unavailable or Rate Limited for {model_id}")
                await asyncio.sleep(10)
            except Exception as e:
                error_info = f"Exception Type: {type(e).__name__}, Error Details: {str(e)}, Traceback: {format_exc()}"
                LOGGER.warn(f"Encountered API error: {error_info}.\nRetrying now. (Attempt {i})")
                error_list.append(error_info)
                api_duration = time.time() - api_start
                await asyncio.sleep(1.5**i)
            else:
                break

        duration = time.time() - start
        LOGGER.debug(f"Completed call to {model_id} in {duration}s")

        if response_data is None:
            raise RuntimeError("No response data received")

        assert len(response_data.choices) == kwargs.get(
            "n", 1
        ), f"Expected {kwargs.get('n', 1)} choices, got {len(response_data.choices)}"

        responses = [
            LLMResponse(
                model_id=model_id,
                completion=choice.message.content,
                stop_reason=choice.finish_reason,
                api_duration=api_duration,
                duration=duration,
                cost=0,
                logprobs=self.convert_top_logprobs(choice.logprobs) if choice.logprobs is not None else None,
            )
            for choice in response_data.choices
        ]

        self.add_response_to_prompt_file(prompt_file, responses)
        if print_prompt_and_response:
            prompt.pretty_print(responses)

        return responses
