import torch
import clip
from PIL import Image

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("../BLIP/ViT-B/32", device=device)

objects_file = 'utils/objects_vocab.txt'
f = open(objects_file, 'r')
data = f.readlines()
# print(data)
# object_dict = {}
object_features_tensor = None
for line in data:
    # print(line.strip())
    with torch.no_grad():
        text = clip.tokenize('A image of: ' + line.strip()).to(device)
        text_embedding = model.encode_text(text)
    if torch.is_tensor(object_features_tensor):
        object_features_tensor = torch.vstack((object_features_tensor, text_embedding))
    else:
        object_features_tensor = text_embedding

print(object_features_tensor.size())
torch.save(object_features_tensor, 'clip_objects_tensor.pt')