import typing as T

from llama_index.core.base.embeddings.base import BaseEmbedding
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

from minimal.configuration import EmbeddingDeviceType, cfg
from minimal.logger import logger


def get_embedding_model(
    name: str,
    total_chunks: int = 0,
    device: EmbeddingDeviceType = None,
    use_hf_endpoint_models: bool = True,
) -> T.Tuple[BaseEmbedding | None, bool | None]:
    """
    Returns an embedding model based on the name and device type.
    3-stage fallback:
    1. check if the model can be served by a dedicated HF endpoint
    2. if not try to get an onnx model with cpu backend
    3. if that fails, get a torch model
    """
    if not name:
        logger.warning("No embedding model name provided.")
        return None, None

    logger.info("Getting local HF model for device '%s': %s", device, name)
    return (
        HuggingFaceEmbedding(
            model_name=name,
            device=device,
            trust_remote_code=True,
            cache_folder=cfg.paths.huggingface_cache.as_posix(),
        ),
        False,
    )
