import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, roc_auc_score


def train_mlp_classifier(X_train, y_train, X_test, y_test, random_state=42, verbose=True):
    cat_cols = X_train.select_dtypes(include=["object", "category"]).columns.tolist()
    num_cols = X_train.select_dtypes(exclude=["object", "category"]).columns.tolist()
    
    if verbose:
        print("num:", len(num_cols), "cat:", len(cat_cols))
        print("categoricals:", cat_cols)
    
    numeric_tf = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler()),
    ])
    
    categorical_tf = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("onehot", OneHotEncoder(handle_unknown="ignore", sparse_output=False)),
    ])
    
    preprocess = ColumnTransformer(
        transformers=[
            ("num", numeric_tf, num_cols),
            ("cat", categorical_tf, cat_cols),
        ],
        remainder="drop",
    )
    
    clf = MLPClassifier(
        hidden_layer_sizes=(100, 100),
        activation="relu",
        solver="adam",
        alpha=1e-4,
        batch_size=128,
        learning_rate_init=1e-3,
        max_iter=200,
        early_stopping=True,
        validation_fraction=0.25,
        n_iter_no_change=20,
        random_state=random_state,
        verbose=verbose,
    )
    
    model = Pipeline(steps=[
        ("prep", preprocess),
        ("clf", clf),
    ])
    
    model.fit(X_train, y_train)
    
    proba = model.predict_proba(X_test)[:, 1]
    pred = (proba >= 0.5).astype(int)
    
    acc = accuracy_score(y_test, pred)
    auc = roc_auc_score(y_test, proba)
    
    return model, acc, auc









