import os
from time import time


def unpickle(file):
    import pickle
    with open(file, 'rb') as handle:
        dict = pickle.load(handle)
        # print('dict: ', dict)
    return dict


def to_pickle(file, data):
    import pickle
    with open(file, 'wb') as handle:
        pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)


def prepare_embeddings(embeddings_size=10000,
                       embeddings_dir=""):
    embeddings = []
    data_per_batch = 1000
    max_count = int(embeddings_size / data_per_batch)

    counter = 0
    start_time = time()
    for file_counter in range(0, max_count):
        file_name = os.path.join(embeddings_dir, str(file_counter))
        embedding_dict = unpickle(file_name)
        elapsed_time = time() - start_time
        print(
            f"file_counter: {file_counter} len(embedding)={len(embedding_dict)} elapsed time: {elapsed_time}",
            flush=True)
        # print('embedding: ', embedding)
        embedding = embedding_dict.items()
        for file_name, embed in embedding:
            if len(embed) == 0:
                print(f"file_name: {file_name} empty embedding: {counter}",
                      flush=True)
                counter += 1
        embeddings.extend(embedding)
    print('len(embeddings): ', len(embeddings), flush=True)
    return embeddings
