import os
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import pickle

DOC_FILE = "diabetes.txt"
NAME = "faiss_index"
MODEL = "embedding_model_EN"

embed_model = SentenceTransformer(MODEL, device='cuda')

documents = []
with open(DOC_FILE, 'r', encoding='utf-8') as f:
    for line in f:
        line = line.strip()
        if line:
            documents.append(line)

print(f"Loaded {len(documents)} documents.")

embeddings = embed_model.encode(
    documents, 
    show_progress_bar=True, 
    batch_size=128,
    device='cuda'
)

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))

os.makedirs(NAME, exist_ok=True)
faiss.write_index(index, f'{NAME}/doc.index')

with open(f'{NAME}/docs.pkl', 'wb') as f:
    pickle.dump(documents, f)

