from typing import Sequence

from src.settings.base import ExtraFieldsNotAllowedBaseModel
from src.settings.generators.chat import CustomChatGenerationSettings
from src.settings.pipelines.inference.base import InferenceExperimentSettings, SingleModelInferenceSettings
from src.settings.tf.generation import GeneratorTransformersSettings


class ChatGenerationSettings(ExtraFieldsNotAllowedBaseModel):
    transformers_settings: GeneratorTransformersSettings
    custom_settings: CustomChatGenerationSettings


class ChatSingleModelInferenceSettings(SingleModelInferenceSettings):
    generation_settings: list[ChatGenerationSettings]
    use_vllm: bool = False
    tensor_parallel_size: int = 1
    max_model_len: int = 8192
    gpu_memory_utilization: float = 0.3


class ChatInferenceExperimentSettings(InferenceExperimentSettings):
    inference_settings: Sequence[ChatSingleModelInferenceSettings]
