"""
Default model configurations for hyperparameter optimization.

This module contains pre-configured model suites with their parameter spaces
for use in hyperparameter tuning experiments.
"""

import lightgbm as lgb
import xgboost as xgb
from interpret.glassbox import ExplainableBoostingRegressor
from mpf_py import MPFRegressor
from sklearn.ensemble import RandomForestRegressor

# Random state for reproducibility
random_state = 42

# Parameter ranges are designed to be practical and avoid extreme values that could break models
# Key principles:
# - subsample/colsample parameters: avoid 0.0 (would break models), typically 0.3-1.0
# - learning_rate: avoid 0.0 and very high values (>0.5), typically 0.001-0.5
# - regularization: log-uniform for wide range, uniform for narrow range
# - tree parameters: reasonable bounds based on typical usage patterns
default_models = {
    "XGBRegressor": (
        xgb.XGBRegressor(
            objective="reg:squarederror",
            verbosity=0,
            random_state=random_state,
            n_jobs=5,
        ),
        {
            "n_estimators": ("randint", 50, 1501),  # Reasonable ensemble size range
            "learning_rate": (
                "uniform",
                0.001,
                0.999,
            ),  # Wide range, though 0.999 is quite high
            "max_depth": ("randint", 1, 20),  # Typical tree depth range
            "subsample": (
                "uniform",
                0.5,
                1.0,
            ),  # Avoid 0.0 (would break), 0.5+ is practical
            "colsample_bylevel": (
                "uniform",
                0.1,
                0.9,
            ),  # Conservative range for level sampling
            "colsample_bytree": ("uniform", 0.3, 1.0),  # Avoid 0.0, 0.3+ is practical
            "gamma": (
                "loguniform",
                0.001,
                1000,
            ),  # Log-uniform for regularization strength
            "reg_alpha": (
                "loguniform",
                0.001,
                1000,
            ),  # Log-uniform for L1 regularization
            "reg_lambda": ("uniform", 0, 1),  # L2 regularization, uniform is fine
        },
    ),
    "LGBMRegressor": (
        lgb.LGBMRegressor(random_state=random_state, verbose=-1, n_jobs=5),
        {
            "n_estimators": ("randint", 50, 501),
            "learning_rate": ("uniform", 0.001, 0.999),
            "num_leaves": ("randint", 20, 61),
            "max_depth": ("randint", 1, 20),
            "min_child_samples": ("randint", 5, 31),
            "subsample": ("uniform", 0.1, 0.6),
            "colsample_bytree": ("uniform", 0.1, 0.6),
            "reg_alpha": ("uniform", 0, 1),
            "reg_lambda": ("uniform", 0, 1),
        },
    ),
    "MPFRegressor": (
        MPFRegressor(seed=random_state, verbosity=0),
        {
            # Parameters aligned with sklearn.py MPFRegressor
            "epochs": ("randint", 1, 5),
            "n_trees": [100],  # Fixed to avoid parameter conflicts
            "n_iter": ("randint", 10, 100),
            "decay": ("uniform", 0.8, 1.0),
            "split_try": ("randint", 2, 20),
            "colsample_bytree": ("uniform", 0.3, 1.0),
            "alpha": ("loguniform", 1e-6, 1),
            "complexity_penalty": [0.0],  # Fixed to default
            "min_split_loss": [0.0],  # Fixed to default
            "min_interval_samples": ("randint", 1, 20),
            "refinement_strategy": ["l2", "huber"],
            "prior_sample_size": [0.0],  # Fixed to default
            "update_clamp": ("uniform", 0.5, 5.0),
            "tilt_tau": [0.0],  # Fixed to 0
            "tilt_rho": [0.0],  # Fixed to default
            "split_strategy": ["random"],
            "top_k": ("randint", 5, 20),
            "must_fill_all_k": [True],
            "similarity_threshold": [0.0],  # Fixed to default
            "bagged": [True],
        },
    ),
    "RandomForestRegressor": (
        RandomForestRegressor(random_state=random_state, n_jobs=-1),
        {
            "n_estimators": ("randint", 50, 1501),
            "max_depth": ("randint", 5, 21),
            "min_samples_split": ("randint", 2, 11),
            "min_samples_leaf": ("randint", 1, 11),
            "max_features": ("uniform", 0.1, 0.6),
        },
    ),
    "ExplainableBoostingRegressor": (
        ExplainableBoostingRegressor(random_state=random_state, n_jobs=-2),
        {
            "learning_rate": ("loguniform", 0.005, 0.05),  # Log-uniform distribution
            "max_bins": ("randint", 64, 257),  # 64 to 256 (inclusive)
            "min_samples_leaf": ("randint", 10, 201),  # Integer parameter: 10 to 200 (inclusive)
            "max_rounds": ("randint", 10, 501),  # 10 to 500 (inclusive)
            "outer_bags": ("randint", 4, 21),  # 4 to 20 (inclusive)
            "smoothing_rounds": ("randint", 0, 501),  # 0 to 500 (inclusive)
            "interactions": ("randint", 0, 21),  # 0 to 20 (inclusive)
            "max_interaction_bins": ("randint", 16, 65),  # 16 to 64 (inclusive)
        },
    ),
}
