import numpy as np
import csv, os, string
import gensim
from gensim.corpora.dictionary import Dictionary
from gensim.models import LdaModel

def build_lda_representation(data_path, results_folder, n_topics, passes, tok=str.split):
    unsup_file = os.path.join(data_path, 'unsupervised.csv')
    train_file = os.path.join(data_path, 'train.csv')
    test_file = os.path.join(data_path, 'test.csv')
    corpus_list = []
    label_list = []
    print("reading files...")
    for f in [train_file, test_file, unsup_file]:
        documents = []
        Y = []
        with open(f) as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=',')
            next(csv_reader, None) ## skip the header
            for row in csv_reader:
                documents += [tok(row[0])]
                if(f != unsup_file):
                    Y += [int(row[1])]
        corpus_list += [documents]
        label_list += [Y]

    # Create dictionary with all of the corpus.
    print("creating dictionary...")
    corpus_dict = Dictionary(corpus_list[0] + corpus_list[1] + corpus_list[2])
    unsup_corpus = [corpus_dict.doc2bow(s) for s in corpus_list[2]]

    # Create model on unsupervised data
    print("creating LDA model on unsupervised corpus...")
    lda = LdaModel(unsup_corpus, num_topics=n_topics, update_every=2, alpha='auto', eta='auto', iterations=100, passes=passes)

    train_corpus = [corpus_dict.doc2bow(s) for s in corpus_list[0]]
    test_corpus = [corpus_dict.doc2bow(s) for s in corpus_list[1]]

    def dense_representation(corpus, n_topics):
        X = np.zeros([len(corpus), n_topics])
        for i in range(len(corpus)):
            for top, prob in lda[corpus[i]]:
                X[i, top] = prob
        return(X)
    
    
    print("creating dense training representation...")
    X_train = dense_representation(train_corpus, n_topics)
    
    print("creating dense testing representation...")
    X_test = dense_representation(test_corpus, n_topics)

    test_name = os.path.join(results_folder, "test.npy")
    train_name = os.path.join(results_folder, "train.npy")

    np.save(test_name, X_test)
    np.save(train_name, X_train)

    return