#gene_expression (exp 1 & 2)
#phishing_dataset_numeric (exp 1)

import numpy as np
import pandas as pd
from ucimlrepo import fetch_ucirepo
import os

GENE_G_ID = "17PwlvAAKeBYGLXPz9L2LVnNJ66XjuyZd"
PHISHING_UCI_ID = 967
Thyroid_Diff_UCI_ID = 915

dataset_folder = "datasets"
if not os.path.exists(dataset_folder):
    os.makedirs(dataset_folder)

# Download the gene dataset
gene_path = os.path.join(dataset_folder, "gene_expression.npz")
if not os.path.exists(gene_path):
    os.system(f"gdown {GENE_G_ID} -O {gene_path}")

# Download the phishing dataset
phishing_path = os.path.join(dataset_folder, "phishing_dataset_numeric.npz")
if not os.path.exists(phishing_path):
    phishing_data = fetch_ucirepo(id=PHISHING_UCI_ID)
    X = phishing_data.data.features
    y = phishing_data.data.targets
    X_numerics = X.select_dtypes(include=[np.number])
    X_array = X_numerics.to_numpy()
    y_array = y.to_numpy()
    np.savez(phishing_path, X=X_array, y=y_array)
    print(f"Phishing dataset downloaded to {phishing_path}")

# Download the Thyroid Diff dataset
thyroid_diff_path = os.path.join(dataset_folder, "Thyroid_Diff.csv")
if not os.path.exists(thyroid_diff_path):
    thyroid_diff_data = fetch_ucirepo(id=Thyroid_Diff_UCI_ID)
    X = thyroid_diff_data.data.features
    y = thyroid_diff_data.data.targets
    data = pd.concat([X, y], axis=1)
    data.to_csv(thyroid_diff_path, index=False)
    print(f"Thyroid Diff dataset downloaded to {thyroid_diff_path}")

