import numpy as np
import torch
from torchmetrics.multimodal.clip_score import CLIPScore


class ClipScore(): 
    def __init__(self, model_id="openai/clip-vit-base-patch16", device="cuda"): 
        self.model = CLIPScore(model_name_or_path=model_id).eval().to(device)
        self.device = device
    
    def __call__(self, text, img): 
        img_tensor = torch.from_numpy(np.array(img)).permute(2, 0, 1).to(self.device)
        scores = self.model(img_tensor, text)

        return scores

