import os
from gensim.models import Word2Vec

def generate(X_train, vectorizer_X, parameters, dataset_name):
    tokenized_sentences = []
    
    vocabulary = vectorizer_X.vocabulary_
    reverse_vocab = {index: word for word, index in vocabulary.items()}

    for row in X_train:
        word_indices = row.indices  # Non-zero indices in the sparse matrix row
        sentence = [reverse_vocab[index] for index in word_indices]
        tokenized_sentences.append(sentence)

    # Train the Word2Vec model
    word2vec_model = Word2Vec(
        sentences=tokenized_sentences,
        vector_size=parameters['vector_size'],
        window=parameters['window'],
        min_count=parameters['min_count'],
        workers=parameters['workers'],
        sg=parameters['sg'],
        epochs=parameters['epochs']
    )

    # Save the trained Word2Vec model
    corpus_name = "word2vec"
    model_path = os.path.join("data", dataset_name, f"{corpus_name}.model")
    word2vec_model.save(model_path)
    word2vec_model.wv.save_word2vec_format(os.path.join("data", f"{corpus_name}.bin"), binary=True)