import os
from gensim.models import FastText

def generate(X_train, vectorizer_X, parameters, dataset_name):

    # Prepare tokenized sentences from the dataset
    tokenized_sentences = []
    vocabulary = vectorizer_X.vocabulary_
    reverse_vocab = {index: word for word, index in vocabulary.items()}

    for row in X_train:
        word_indices = row.indices  # Non-zero indices in the sparse matrix row
        sentence = [reverse_vocab[index] for index in word_indices]
        tokenized_sentences.append(sentence)

    # Train FastText model
    fasttext_model = FastText(
        sentences=tokenized_sentences,
        vector_size=parameters['vector_size'],
        window=parameters['window'],
        min_count=parameters['min_count'],
        workers=parameters['workers'],
        sg=parameters['sg'],
        epochs=parameters['epochs']
    )

    corpus_name = "fasttext"
    model_path = os.path.join("data", dataset_name, f"{corpus_name}.model")
    fasttext_model.save(model_path)
    model_path_bin = os.path.join("data", dataset_name, f"{corpus_name}.bin")
    fasttext_model.wv.save_word2vec_format(model_path_bin, binary=True)