from time import sleep
from transformers import AutoProcessor, AutoModel

VLMS = {
    "clip": {"name": "openai/clip-vit-base-patch32", "emb_dim": 512, "max_length": 77},
    "openclip": {"name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", "emb_dim": 512, "max_length": 77},
    "siglip": {"name": "google/siglip-base-patch16-224", "emb_dim": 768, "max_length": 64},
    "align": {"name": "kakaobrain/align-base", "emb_dim": 640, "max_length": 77},
}

def get_model_and_processors(vlm_key):
    cfg = VLMS[vlm_key]
    model = AutoModel.from_pretrained(cfg["name"])

    try:
        processor = AutoProcessor.from_pretrained(cfg["name"], use_fast=True)
    except Exception:
        sleep(10)
        processor = AutoProcessor.from_pretrained(cfg["name"], use_fast=True)

    # We return the processor directly to handle dict-based outputs
    # and a helper for the specific max_length
    return {
        "model": model,
        "processor": processor,
        "max_length": cfg["max_length"],
        "emb_dim": cfg["emb_dim"]
    }
