import urllib.request
from io import BytesIO

# distributions
from zipfile import ZipFile

import numpy as np
import pandas as pd
import scipy.stats as stats

# Add import for California Housing
from sklearn.datasets import fetch_california_housing


# ===== DATASET LOADING =====
def load_uci_dataset(name):
    if name == "BIKE_SHARING":
        url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip"
        with urllib.request.urlopen(url) as response:
            zipfile = ZipFile(BytesIO(response.read()))
            with zipfile.open("hour.csv") as f:
                df = pd.read_csv(f, parse_dates=["dteday"])
        df["year"] = df["dteday"].dt.year
        df["month"] = df["dteday"].dt.month
        X = df.drop(columns=["dteday", "cnt", "casual", "registered"])
        y = df["cnt"]
        # print(X.columns.tolist())
        return X, y, "Bike Sharing"

    elif name == "CALIFORNIA_HOUSING":
        data = fetch_california_housing()
        X = pd.DataFrame(data.data, columns=data.feature_names)
        y = pd.Series(data.target, name="MedHouseVal")
        return X, y, "California Housing"

    elif name == "ABALONE":
        url = "https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data"
        columns = [
            "sex",
            "length",
            "diameter",
            "height",
            "whole_weight",
            "shucked_weight",
            "viscera_weight",
            "shell_weight",
            "rings",
        ]
        df = pd.read_csv(url, names=columns)
        # Encode 'sex' as numeric: M=0, F=1, I=2
        df["sex"] = df["sex"].map({"M": 0, "F": 1, "I": 2})
        X = df.drop(columns=["rings"])
        y = df["rings"]
        return X, y, "Abalone"

    else:
        raise ValueError(f"Unknown dataset: {name}")
