from src.data.datasets import SKLEARN_DATASETS, OPENML_DATASETS, LOCAL_DIR_DATASETS, UCI_DATASETS, KAGGLE_DATASETS

CONFIG = {
    # Data settings
    "test_size": 0.2,
    "validation_size": 0.15,
    "data_split_seed": 42,
    "random_state": 1024,
    "evaluation_use_density_ranges": False,

    # Region definition method (KDE or Bins)
    "region_definition_method": "kde",
    "n_bins_eval": 10,
    "bin_strategy_eval": 'quantile',
    "kde_bandwidth_eval": None,

    # HPO settings
    "n_trials": 40,
    "hpo_timeout": 172800,  # 48 hours
    "hpo_epochs": 300,
    "hpo_patience": 70,

    # Final training settings
    "final_epochs": 600,
    "final_patience": 600,

    # Loss settings
    "loss_bins": 10,

    # FDS settings
    "fds_num_target_bins": 50,
    "fds_momentum": 0.9,
    "fds_start_epoch": 0,
    "fds_discretizer_strategy": 'uniform',

    # LDS settings
    "lds_ks": 5,
    "lds_sigma": 2.0,
    "lds_kernel": "gaussian",
    "lds_reweight_base": "sqrt_inv",
    "loss_for_reweighting": "l1",

    # B-MSE settings
    "bmse_noise_sigma": 6.0,
    "gmm_components": 3,
    
    # RankSim settings
    "ranksim_lambda_val": 1.0,
    "ranksim_alpha": 1.0,
    
    # ConR settings
    "conr_distance_threshold": 1.0,
    "conr_temperature": 0.07,
    "conr_pushing_power": 0.01,
    "conr_alpha": 1.0,
    "conr_mse_weight": 1.0,
    
    # Algorithms to run
    "algorithms_to_run": [
        "XGBoost",
        "LightGBM",
        "CatBoost",
        "MLP",
        "SMOTER_XGBoost",
        "GaussianNoise_XGBoost",
        "CASMIR",
        "MLP_SQRT_INV",
        "MLP_LDS_Notebook",
        "MLP_FDS_Notebook",
        "MLP_LDS_FDS_Notebook",
        "MLP_GAI_BMSE",
        "MLP_BMC_BMSE",
        "MLP_RankSim",
        "MLP_ConR",
        "Simple_Ensemble",
    ],

    # Datasets to run
    "datasets_to_run": list(SKLEARN_DATASETS.keys()) + \
                       list(OPENML_DATASETS.keys()) + \
                       list(LOCAL_DIR_DATASETS.keys()) + \
                       list(UCI_DATASETS.keys()) + \
                       list(KAGGLE_DATASETS.keys()),

    # CASMIR hyperparameters
    "casmir_k_neighbors": 10,
    "casmir_kde_bandwidth": 'silverman',
    "casmir_feature_bw": 1.5,
    "casmir_label_bw": 10.0,
    "casmir_density_factor": 0.1,
    "casmir_strength_base": 0.6,
    "casmir_density_c": 20.0,
    "casmir_lambda_aux": 0.5,
    "casmir_lambda_load": 0.05,
    "casmir_num_experts": 3,
    "casmir_expert_hidden_dims": [32, 16],
    "casmir_gate_hidden_dims": [32],
    "casmir_density_percentiles": [33.3, 66.7],

    "gai_init_noise_sigma": 6.0,
    "max_target_for_balancing": 51,

    # Dataset-specific configs (few/many thresholds and bin counts)
    "dataset_configs": {
        "california_housing": {"few_threshold": 50, "many_threshold": 100, "y_bins": 46, "checked": True},
        "Abalone": {"few_threshold": 50, "many_threshold": 90, "y_bins": 76, "checked": True},
        "acceleration": {"few_threshold": 5, "many_threshold": 13, "y_bins": 40, "checked": True},
        "airfoild": {"few_threshold": 5, "many_threshold": 12, "y_bins": 22, "checked": True},
        "availPwr": {"few_threshold": 5, "many_threshold": 12, "y_bins": 48, "checked": True},
        "bank8FM": {"few_threshold": 18, "many_threshold": 30, "y_bins": 31, "checked": True},
        "concreteStrength": {"few_threshold": 5, "many_threshold": 10, "y_bins": 19, "checked": True},
        "cpuSm": {"few_threshold": 20, "many_threshold": 50, "y_bins": 77, "checked": True},
        "dAiler": {"few_threshold": 20, "many_threshold": 90, "y_bins": 104, "checked": False},
        "fuelCons": {"few_threshold": 4, "many_threshold": 10, "y_bins": 43, "checked": True},
        "machineCpu": {"few_threshold": 2, "many_threshold": 10, "y_bins": 10, "checked": True},
        "maxTorque": {"few_threshold": 5, "many_threshold": 16, "y_bins": 42, "checked": True},
        "servo": {"few_threshold": 2, "many_threshold": 10, "y_bins": 13, "checked": True},
        "a1": {"few_threshold": 3, "many_threshold": 10, "y_bins": 7, "checked": False},
        "a2": {"few_threshold": 2, "many_threshold": 10, "y_bins": 10, "checked": False},
        "a3": {"few_threshold": 2, "many_threshold": 10, "y_bins": 7, "checked": False},
        "a4": {"few_threshold": 3, "many_threshold": 13, "y_bins": 10, "checked": True},
        "a5": {"few_threshold": 3, "many_threshold": 8, "y_bins": 10, "checked": False},
        "a6": {"few_threshold": 2, "many_threshold": 10, "y_bins": 10, "checked": False},
        "diabetes": {"few_threshold": 4, "many_threshold": 10, "y_bins": 10, "checked": True},
        "grid_stability_regression": {"few_threshold": 25, "many_threshold": 57, "y_bins": 34, "checked": False},
        "vidio_transcoding_regression": {"few_threshold": 30, "many_threshold": 200, "y_bins": 552, "checked": True},
        "sarcos_regression": {"few_threshold": 50, "many_threshold": 130, "y_bins": 160, "checked": True},
        "diamond_regression": {"few_threshold": 50, "many_threshold": 200, "y_bins": 80, "checked": False},
        "miami_housing_regression": {"few_threshold": 18, "many_threshold": 60, "y_bins": 161, "checked": False},
        "health_insurance": {"few_threshold": 150, "many_threshold": 500, "y_bins": 10, "checked": False},
        "fifa": {"few_threshold": 40, "many_threshold": 200, "y_bins": 669, "checked": False},
        "solar_flare": {"few_threshold": 12, "many_threshold": 25, "y_bins": 7, "checked": False},
        "space_ga": {"few_threshold": 5, "many_threshold": 11, "y_bins": 90, "checked": False},
        "ecoli70": {"few_threshold": 5, "many_threshold": 15, "y_bins": 33, "checked": False},
        "magic_irri": {"few_threshold": 8, "many_threshold": 18, "y_bins": 34, "checked": False},
        "nhanes_age": {"few_threshold": 9, "many_threshold": 13, "y_bins": 30, "checked": False},
        "pumadyn32nh": {"few_threshold": 22, "many_threshold": 67, "y_bins": 35, "checked": True},
        "geographical_origin_of_music": {"few_threshold": 20, "many_threshold": 50, "y_bins": 6, "checked": True},
        "kin8nm": {"few_threshold": 15, "many_threshold": 40, "y_bins": 38, "checked": True},
        "Moneyball": {"few_threshold": 5, "many_threshold": 10, "y_bins": 24, "checked": True},
        "red_wine": {"few_threshold": 20, "many_threshold": 50, "y_bins": 5, "checked": True},
        "socmob": {"few_threshold": 3, "many_threshold": 22, "y_bins": 156, "checked": True},
        "white_wine": {"few_threshold": 50, "many_threshold": 200, "y_bins": 7, "checked": True},
        "forest_fires": {"few_threshold": 3, "many_threshold": 30, "y_bins": 100, "checked": False},
        "superconductivity": {"few_threshold": 55, "many_threshold": 100, "y_bins": 45, "checked": False},
        "communities_crime": {"few_threshold": 10, "many_threshold": 30, "y_bins": 25, "checked": False},
        "qsar_aquatic_toxicity": {"few_threshold": 3, "many_threshold": 6, "y_bins": 21, "checked": False},
        "energy_efficiency": {"few_threshold": 10, "many_threshold": 20, "y_bins": 10, "checked": False},
        "bike_sharing": {"few_threshold": 7, "many_threshold": 12, "y_bins": 10, "checked": False},
        "combined_cycle_power_plant": {"few_threshold": 50, "many_threshold": 65, "y_bins": 28, "checked": False},
        "parkinsons_telemonitoring": {"few_threshold": 20, "many_threshold": 40, "y_bins": 25, "checked": False},
        "brazilian_houses": {"few_threshold": 70, "many_threshold": 180, "y_bins": 30, "checked": False},
        "california_housing_openml": {"few_threshold": 50, "many_threshold": 120, "y_bins": 35, "checked": False},
        "cps88wages": {"few_threshold": 150, "many_threshold": 530, "y_bins": 40, "checked": False},
        "fps_benchmark": {"few_threshold": 100, "many_threshold": 250, "y_bins": 25, "checked": False},
        "kings_county": {"few_threshold": 200, "many_threshold": 400, "y_bins": 35, "checked": False},
        "physiochemical_protein": {"few_threshold": 100, "many_threshold": 200, "y_bins": 50, "checked": False},
        "online_news_popularity": {"few_threshold": 60, "many_threshold": 500, "y_bins": 300, "checked": False},
        "sulfur": {"few_threshold": 30, "many_threshold": 180, "y_bins": 25, "checked": False},
        "bank32nh": {"few_threshold": 20, "many_threshold": 60, "y_bins": 22, "checked": False},
    },
}
