import setup

from lale.grammar import Grammar
from lale.lib.lale import Hyperopt
from lale.pretty_print import to_string
from sklearn.metrics import accuracy_score

from lale.lib.sklearn import SimpleImputer as Imputer
from lale.lib.sklearn import MissingIndicator
from lale.lib.sklearn import PCA
from lale.lib.sklearn import GaussianNB as GaussNB
from lale.lib.sklearn import LinearSVC
from lale.lib.sklearn import RidgeClassifier
from lale.lib.sklearn import SGDClassifier
from lale.lib.sklearn import OneHotEncoder
from lale.lib.sklearn import OrdinalEncoder

g = Grammar()

g.start  = g.est | g.clean >> g.est | g.tfm >> g.est | g.clean >> g.tfm >> g.est
g.clean  = g.clean1 >> g.clean | g.clean1
g.tfm    = g.tfm1 >> g.tfm | g.tfm1

g.clean1 = Imputer | MissingIndicator
g.tfm1   = PCA | OrdinalEncoder | OneHotEncoder(handle_unknown='ignore')
g.est    = GaussNB | RidgeClassifier  | LinearSVC | SGDClassifier


def grammar_trainer(data, args):
    (X_train, y_train), (X_test, y_test) = data
    generated = g.unfold(args.max_depth)
    trainer = Hyperopt(
        estimator=generated,
        cv=args.cv,
        max_evals=args.max_eval,
        scoring='accuracy',
        max_opt_time=args.max_opt_time,
        max_eval_time=args.max_eval_time)
    trained = trainer.fit(X_train, y_train)
    y_hat = trained.predict(X_test)
    test_acc = str(accuracy_score(y_test, y_hat))        
    return {
        'test_acc': str(accuracy_score(y_test, y_hat)),
        'pipeline': to_string(trained.get_pipeline(), show_imports=False)}
  
  
setup.run_experiments('grammar_alphad3m_1', grammar_trainer)
