import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.decomposition import PCA

medi_df = pd.read_csv('./medical/medicheck-expert.csv')
with open('./medical/medicheck-neg.csv') as f:
    neg_list = list(f)
medi_df_neg = medi_df[medi_df['query-label-expert'] == 0]
neg_list = neg_list + medi_df_neg['query'].to_list()
medi_df = medi_df.drop(medi_df_neg.index)
medi_df_serious = medi_df[medi_df['query-label-expert'] >= 2]
medi_df = medi_df.drop(medi_df_serious.index)

model = SentenceTransformer("all-MiniLM-L6-v2")

neg_emb = model.encode(neg_list)
non_ser_emb = model.encode(medi_df['query'].to_list())
ser_emb = model.encode(medi_df_serious['query'].to_list())

print(neg_emb.shape, non_ser_emb.shape, ser_emb.shape)

np.save('./medical/neg_emb.npy', neg_emb)
np.save('./medical/non_serious_emb.npy', non_ser_emb)
np.save('./medical/serious_emb.npy', ser_emb)
