import asyncio
import random
import time
from typing import Optional


class LLMClient:
    def __init__(
        self,
        llm_api_key: str,
        model: str = "llama3.3-70b-instruct",
    ):

    async def get_llm_response(
        self, prompt: str, num_responses: int = 1, kwargs: Optional[dict] = None
    ) -> list:
        """
        Generates LLM responses to a signle prompt.

        Args:
            prompt: The prompt to send to the LLM.
            num_responses: The number of responses to return.
            kwargs: Additional generation parameters.

        Returns:
            A list of num_responses LLM responses.
        """

        raise NotImplementedError # "Replace with your own implementation to call an LLM client server"


    async def get_batch_llm_responses(
        self,
        prompts: list[str],  # Each prompt is a string
        num_responses: int = 1,
        kwargs: Optional[dict] = None,
        max_retries: int = 8,
        timeout_per_call: Optional[int] = None,
    ) -> list:
        """
        Generates LLM responses to a a batch of prompts.

        Args:
            prompt: The prompt to send to the LLM.
            num_responses: The number of responses to return.
            kwargs: Additional generation parameters.

        Returns:
            A nested list with num_responses LLM responses for each prompt.
        """

        raise NotImplementedError # "Replace with your own implementation to call an LLM client server"
