import torch
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
import json
from tqdm import tqdm
import jsonlines 

torch.cuda.set_device(0)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = CLIPModel.from_pretrained("/path/to/model").to(device)
processor = CLIPProcessor.from_pretrained("/path/to/model")

def compute_similarity(input_json_path, output_json_path):
 
    with open(input_json_path, 'r', encoding='utf-8') as file:
        data = json.load(file) 

    with jsonlines.open(output_json_path, mode='w') as writer:
        for item in tqdm(data, desc="Processing images", unit="image"):
            image = item['image'] 
            image_path = "/path/to/image"+image
            caption = item['caption'] 
            image_id = item['id']  
            
            image = Image.open(image_path)
            inputs = processor(text=[caption], images=image, return_tensors="pt", padding=True, truncation=True, max_length=77).to(device)
            
            with torch.no_grad():
                outputs = model(**inputs)
            
            logits_per_image = outputs.logits_per_image  
            probs = logits_per_image.softmax(dim=1)          
            logits_per_image_value = logits_per_image[0].tolist()  
            similarity_score = probs[0][0].item()  
            

            writer.write({"id": image_id, "logits_per_image":logits_per_image_value, "similarity_score": similarity_score})

input_json_path = '/path/to/input' 
output_json_path = '/path/to/output' 
compute_similarity(input_json_path, output_json_path)

