import os
import subprocess

def generate(X_train, vectorizer_X, parameters, dataset_name):
    tokenized_sentences = []
    
    vocabulary = vectorizer_X.vocabulary_
    reverse_vocab = {index: word for word, index in vocabulary.items()}

    for row in X_train:
        word_indices = row.indices  # Non-zero indices in the sparse matrix row
        sentence = [reverse_vocab[index] for index in word_indices]
        tokenized_sentences.append(sentence)

    output_file = "custom_glove_text8_sentences.txt"
    with open(output_file, "w", encoding="utf-8") as f:
        for sentence in tokenized_sentences:
            # Join words in the sentence with spaces and write to the file
            f.write(" ".join(sentence) + "\n")

    print(f"Text8-like file with sentences saved to {output_file}")

    # 1. Clone GloVe if not present
    if not os.path.exists("glove"):
        subprocess.run(["git", "clone", "https://github.com/stanfordnlp/glove"], check=True)
    else:
        print("GloVe repository already exists.")
        
    # 2. Move corpus to glove/text8
    text8_path = os.path.join("glove", "text8")
    if os.path.exists(text8_path):
        print("glove/text8 already exists. Removing it.")
        subprocess.run(["rm", "-rf", text8_path], check=True)
    else:
        print("glove/text8 does not exist.")
        
    subprocess.run(["mv", output_file, text8_path], check=True)

    # 3. Edit demo.sh with parameters
    demo_path = os.path.join("glove", "demo.sh")
    if not os.path.exists(demo_path):
        print("demo.sh not found in glove directory.")
        return
    with open(demo_path, "r") as f:
        lines = f.readlines()
    for i, line in enumerate(lines):
        if "VECTOR_SIZE=" in line and "export" not in line:
            lines[i] = f"VECTOR_SIZE={parameters['vector_size']}\n"
        if "MAX_ITER=" in line and "export" not in line:
            lines[i] = f"MAX_ITER={parameters['max_iter']}\n"
        if "WINDOW_SIZE=" in line and "export" not in line:
            lines[i] = f"WINDOW_SIZE={parameters['window_size']}\n"
    with open(demo_path, "w") as f:
        f.writelines(lines)

    # 4. Build GloVe
    subprocess.run(["make"], cwd="glove", check=True)

    # 5. Run demo.sh
    subprocess.run(["bash", "demo.sh"], cwd="glove", check=True)
    
    env_path = os.getcwd()
    data_folder = os.path.join(env_path, 'data')
    glove_file = os.path.join(data_folder, dataset_name, 'glove.model')
    print(f"Moving glove model to {glove_file}")
    subprocess.run(["mv", "glove/vectors.txt", glove_file], check=True)

    remove_cloned_glove()

def remove_cloned_glove():
    print("Removing glove directory")
    subprocess.run(["rm", "-rf", "glove"], check=True)
    