from PIL import Image

from transformers import CLIPProcessor, CLIPModel

model_VIT_L14 = CLIPModel.from_pretrained("openai/clip-vit-large-patch14", local_files_only=True).float().to('cuda')
processor_VIT_L14 = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14", local_files_only=True)

model_VIT_bigG14 = CLIPModel.from_pretrained("laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", local_files_only=True).float().to('cuda')
processor_VIT_bigG14 = CLIPProcessor.from_pretrained("laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", local_files_only=True)


def VIT_L14(images, texts:list):
    # images = []
    # for path in image_paths:
    #     images.append(Image.open(path))
    # images = images.half().to('cuda')  
    inputs = processor_VIT_L14(text=texts, images=images, return_tensors="pt", padding=True, do_rescale=False).to('cuda')

    outputs = model_VIT_L14(**inputs)
    logits_per_image = outputs.logits_per_image # this is the image-text similarity score

    return logits_per_image.tolist()

def VIT_bigG14(images, texts:str):
    # images = []
    # for path in image_paths:
    #     images.append(Image.open(path))
    # images = images.half().to('cuda')  
    inputs = processor_VIT_bigG14(text=texts, images=images, return_tensors="pt", padding=True, do_rescale=False).to('cuda')

    outputs = model_VIT_bigG14(**inputs)
    logits_per_image = outputs.logits_per_image # this is the image-text similarity score

    return logits_per_image.tolist()
