from huggingface_hub import notebook_login
from huggingface_hub import login

# login(token='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')
# notebook_login()


import torch
from diffusers import StableDiffusionPipeline
import numpy as np
# from transformers import CLIPTextModel, CLIPTokenizer

model_id = "CompVis/stable-diffusion-v1-4"
device = "cuda"


pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe = pipe.to(device)

text_encoder = pipe.text_encoder
tokenizer = pipe.tokenizer

prompts = ["goldfish",
           "white_shark",
           "tiger_shark",
           "hammerhead",
           "electric_ray",
           "stingray",
           "cock",
           "hen",
           "ostrich",
           "brambling",
           "goldfinch",
           "house_finch",
           "junco",
           "indigo_bunting",
           "robin",
           "bulbul",
           "jay",
           "magpie",
           "chickadee",
           "water_ouzel"]

embeds = []
for i in range(20):
    inputs = tokenizer(prompts[i], return_tensors="pt").input_ids.to("cuda")  # or "cpu"
    text_embeddings = text_encoder(inputs)[0]
    # embeds.append(inputs[0][1])
    # print(text_embeddings[0][1].shape)
    embeds.append(text_embeddings[0][1].detach().cpu())

embeds = np.array(embeds)
pt_embeds = torch.from_numpy(embeds)
print(pt_embeds.shape)

torch.save(pt_embeds, "./embeds.pt")
