from time import time

from sklearn.decomposition import TruncatedSVD
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import Normalizer

import config


def run_svd_classifier(vectorizer, x_train, yy_train, xx_test, yy_test, **kwargs):
    categories = kwargs.get("categories")
    svd = TruncatedSVD(len(categories))
    normalizer = Normalizer(copy=False)
    lsa = make_pipeline(svd, normalizer)

    if not config.is_silent:
        print("training model...")
    t0 = time()
    X = lsa.fit_transform(x_train)
    duration = time() - t0
    if not config.is_silent:
        print("done in %.2fs" % duration)

    # Test New Data
    # data_limit = 10
    # docs_new = xx_test[:data_limit]
    # docs_new_target = yy_test[:data_limit]

    docs_new = xx_test
    docs_new_target = yy_test

    if not config.is_silent:
        print("testing model...")
        print("svd.explained_variance_ratio_")
        print(svd.explained_variance_ratio_)
        print("svd.explained_variance_ratio_.sum")
        print(svd.explained_variance_ratio_.sum())
        print("svd.singular_values_")
        print(svd.singular_values_)
