import os
import json
import random
import pickle
from sentence_transformers import SentenceTransformer


def generate_instruction_embedding(pickle_file_path, save_file_path):
    load_file = open(pickle_file_path, 'rb')
    instructions = pickle.load(load_file)
    load_file.close()

    model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

    output_space = {}
    for instruct in instructions:
        try:
            embedding = model.encode(instruct)[0]
            embedding = embedding.astype('float16')
        except:
            breakpoint()
        output_space[tuple(embedding.tolist())] = instruct

    with open(save_file_path, 'wb') as save_file:
        pickle.dump(output_space, save_file)

import openai
# from openai import OpenAI
import numpy as np

def openai_embedding():
    load_path = 'YOUR_INPUT_PATH'
    save_path = 'YOUR_OUTPUT_PATH'
    files = os.listdir(load_path)

    # client = OpenAI()
    model="text-embedding-ada-002"


    for file_path in files:
        if file_path.startswith('test_'):
            continue
        load_file = open(os.path.join(load_path, file_path), 'rb')
        input_space = pickle.load(load_file)
        load_file.close()
        print('Generation for ', file_path)
        output_space = {}
        for i, instruct in enumerate(input_space.values()):
            # try:
            print(str(i)+'/500', end='\r')
            embedding = openai.embeddings.create(input = instruct, model=model).data[0].embedding 
            embedding = np.asarray(embedding)
            embedding = embedding.astype('float16')
            # except:
            #     breakpoint()
            output_space[tuple(embedding.tolist())] = instruct

        with open(os.path.join(save_path, file_path), 'wb') as save_file:
            pickle.dump(output_space, save_file)

def instruct_to_emb(instruct):
    model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
    try:
        embedding = model.encode(instruct)[0]
        embedding = embedding.astype('float16')
    except:
        breakpoint()
    return embedding


if __name__ == '__main__':
    generate_instruction_embedding('your_input_path', 'your_output_path')
