import os
import numpy as np
from sentence_transformers import SentenceTransformer
# install with pip install -U sentence-transformers

#DATASET_NAME = 'language_table_blocktoblock_sim'
DATASET_NAME = 'language_table_blocktoblock_4block_sim'
num = 7999

root_directory = os.path.join('DATASET_PATH', DATASET_NAME, "labels")

def decode_inst(inst):
    """Utility to decode encoded language instruction"""
    return bytes(inst[np.where(inst != 0)].tolist()).decode("utf-8") 

sentences = [decode_inst(np.load(os.path.join(root_directory, str(i)+".npy"))) for i in range(7999)]

model = SentenceTransformer('sentence-transformers/sentence-t5-base')
embeddings = model.encode(sentences)

np.save(os.path.join(root_directory, "inst.npy"), embeddings)
