from sentence_transformers import SentenceTransformer
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA

def encode_sentences(filename):
    df = pd.read_csv(filename, header=None)
    sentences = df[0].tolist()
    model = SentenceTransformer('all-mpnet-base-v2')
    embeddings = model.encode(sentences)
    return embeddings, sentences

def reduce_dimensionality(embeddings, n_dimensions):
    # do truncated SVD
    U, S, V_T = np.linalg.svd(embeddings, full_matrices=False)
    reduced_embeddings = U[:, :n_dimensions] @ np.diag(S[:n_dimensions])
    return reduced_embeddings