import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.decomposition import PCA

n_comp = 30

pos_df = pd.read_csv('./ruarobot/pos.train.csv')
pos_test_df = pd.read_csv('./ruarobot/pos.test.csv')
amb_df = pd.read_csv('./ruarobot/amb.train.csv')
amb_test_df = pd.read_csv('./ruarobot/amb.test.csv')
neg_df = pd.read_csv('./ruarobot/neg.train.csv')
neg_test_df = pd.read_csv('./ruarobot/neg.test.csv')

model = SentenceTransformer("all-MiniLM-L6-v2")

pos_emb = model.encode(pos_df['text'].to_list())
amb_emb = model.encode(amb_df['text'].to_list())
neg_emb = model.encode(neg_df['text'].to_list())
# pca = PCA(n_components=n_comp)
# transformed_data = pca.fit_transform(np.concatenate((pos_emb, amb_emb, neg_emb)))
# pos_emb = transformed_data[:pos_emb.shape[0]]
# amb_emb = transformed_data[pos_emb.shape[0]:pos_emb.shape[0] + amb_emb.shape[0]]
# neg_emb = transformed_data[pos_emb.shape[0] + amb_emb.shape[0]:]

pos_test_emb = model.encode(pos_test_df['text'].to_list()) #pca.transform(
amb_test_emb = model.encode(amb_test_df['text'].to_list()) #pca.transform(
neg_test_emb = model.encode(neg_test_df['text'].to_list()) #pca.transform(


np.save('./ruarobot/pos_emb.npy', pos_emb)
np.save('./ruarobot/amb_emb.npy', amb_emb)
np.save('./ruarobot/neg_emb.npy', neg_emb)
np.save('./ruarobot/pos_test_emb.npy', pos_test_emb)
np.save('./ruarobot/amb_test_emb.npy', amb_test_emb)
np.save('./ruarobot/neg_test_emb.npy', neg_test_emb)