import torch
from PIL import Image

from open_clip import load_YouCLIP

model, preprocess, tokenizer = load_YouCLIP(model_name='YouCLIP-Base',
                                            model_file_path='/xxx/xx/xx/You-Base.pt')

image = preprocess(Image.open("assets/demo.png")).unsqueeze(0)
text = tokenizer(["一只猪", "一个狗", "一只猫"])

with torch.no_grad(), torch.cuda.amp.autocast():
    image_features = model.encode_image(image)
    text_features = model.encode_text(text)
    image_features /= image_features.norm(dim=-1, keepdim=True)
    text_features /= text_features.norm(dim=-1, keepdim=True)

    text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)
print(text_probs)
