import pandas as pd
from sklearn.model_selection import train_test_split
from autogluon.tabular import TabularPredictor
from autogluon.features.generators import (
    AutoMLPipelineFeatureGenerator,
    AbstractFeatureGenerator,
)
from sklearn.metrics import roc_auc_score


class NoOpFeatureGenerator(AbstractFeatureGenerator):
    def _fit_transform(
        self, X: pd.DataFrame, y: pd.Series = None, **kwargs
    ) -> (pd.DataFrame, dict):
        """Method to fit and transform the data, returning both the DataFrame and type_family_groups."""
        # As no operation is done, just pass the dataframe as is and return an empty dictionary for type_family_groups
        return X, {}

    def _transform(self, X: pd.DataFrame, **kwargs) -> pd.DataFrame:
        """Method to transform the data, used during inference."""
        return X

    def get_default_infer_features_in_args(self):
        """Return the default infer features in arguments."""
        return {}

    def get_default_ag_args(self):
        """Return the default AutoGluon arguments for the generator."""
        return {}

    def get_default_ag_args_fit(self):
        """Return the default AutoGluon fit arguments for the generator."""
        return {}


class AutoXGB:
    def __init__(self, target):
        self.target = target
        self.model = None

    def _create_feature_generator(self):
        """Set up the feature generator."""
        return NoOpFeatureGenerator()

    def load_data(self, data):
        """Load data from a specified path and split into train and test sets."""
        for col in data.select_dtypes(include="object").columns:
            data[col] = data[col].astype("category")
        if self.target not in data.columns:
            raise ValueError(f"Target column '{self.target}' not found in the dataset.")
        train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
        return train_data, test_data

    def train_model(self, train_data):
        """Train an AutoGluon model with the provided training data using only XGBoost."""
        self.model = TabularPredictor(
            label=self.target,
            # eval_metric='accuracy',
            eval_metric="log_loss",
            verbosity=0,  # Set verbosity to 0 to minimize logs
            # path="../AutogluonModels/xgboost_model",
        ).fit(
            train_data=train_data,
            hyperparameters={
                "XGB": {  # Specify to use only XGBoost
                    "ag_args": {"valid_stacker": False},
                    "use_label_encoder": False,
                    "tree_method": "hist",
                    "enable_categorical": True,
                }
            },
            feature_generator=self._create_feature_generator(),
            presets="best_quality",  # Set to 'medium_quality_faster_train' for faster performance
            time_limit=3600,  # Set time limit for hyperparameter tuning
            num_stack_levels=0,  # Ensure no stacking
            num_bag_folds=0,  # Ensure no bagging
            num_bag_sets=0,
            ag_args_fit={"max_memory_usage_ratio": 3,},
        )

    def predict(self, test_data):
        """Make predictions on the test data."""
        if not self.model:
            raise ValueError("Model has not been trained yet.")
        return self.model.predict(test_data.drop(columns=[self.target]))

    def evaluate_model(self, test_data):
        """Evaluate model performance on the test set."""
        y_test = test_data[self.target]
        y_pred = self.predict(test_data)
        accuracy = (y_pred == y_test).mean()
        return accuracy

    def predict_proba(self, test_data):
        """Make probability predictions on the test data."""
        if not self.model:
            raise ValueError("Model has not been trained yet.")
        if self.target in test_data.columns:
            test_data = test_data.drop(columns=[self.target])
        return self.model.predict_proba(test_data)

    def calculate_importance(self, data):
        """Calculate feature importance using the trained model."""
        if not self.model:
            raise ValueError("Model has not been trained yet.")
        return self.model.feature_importance(data)

    def calculate_aucroc(self, test_data, positive_class):
        """Calculate the AUC-ROC of the model."""
        if not self.model:
            raise ValueError("Model has not been trained yet.")
        y_test = test_data[self.target]
        y_prob = self.predict_proba(test_data)[
            positive_class
        ]  # Assuming binary classification
        auc_roc = roc_auc_score(y_test, y_prob, multi_class="ovr")
        return auc_roc
