#%%
from sentence_transformers import SentenceTransformer
sentences = ["This is an example sentence. If i put 2 sentences what happens", "Each sentence is converted"]

model = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v2')
embeddings = model.encode(sentences)
print(embeddings)
# %%
# Getting the list of sentences from folktexts

# %%
from folktexts.prompting import encode_row_prompt
import pandas as pd
from folktexts.acs import ACSDataset
from folktexts.acs import ACSTaskMetadata


llama1 = pd.read_csv('folktexts-results/folktexts-results/model-Llama-3.2-1B-Instruct_task-ACSIncome/Llama-3.2-1B-Instruct_bench-75857734/ACSIncome_full_seed-42_hash-1998608642.test_predictions.csv')
DATA_DIR = "notebooks/data"
TASK_NAME = "ACSIncome"
task = ACSTaskMetadata.get_task(TASK_NAME, use_numeric_qa=False)

dataset = ACSDataset.make_from_task(task=task, cache_dir=DATA_DIR)
# %%
dataset.data
# Process llama1
features1 = dataset.get_features_data()
matched_features1 = features1.loc[llama1["Unnamed: 0"]].copy()
test1 = llama1.set_index(llama1["Unnamed: 0"].values)
test1.drop(columns=["Unnamed: 0"], inplace=True)
merged_df1 = pd.concat([matched_features1, test1], axis=1)

# %%
dataset.data
# %%
merged_df1
# %%
sentences_list = []
for index, row in merged_df1.iterrows():
    sentence = encode_row_prompt(row, task)
    sentences_list.append(sentence)

print(f"Generated {len(sentences_list)} sentences")

# %%
embeddings = model.encode(sentences_list)
print(embeddings)
# %%
import numpy as np

embeddings_array = np.array(embeddings)

# %%
embeddings_array.shape
# %%
np.save('deferral_experiment/sentence_embeddings_MiniLM_L12_v2.npy', embeddings_array)
# %%
