# select.py
import numpy as np
import random
from sklearn.linear_model import LogisticRegression

# Hard Thresholding Gradient Descent with SVRG
class HardThresholdingSVRG:
    def __init__(self, top_k, learning_rate=0.01, n_iter=100, batch_size=64):
        self.top_k = top_k
        self.learning_rate = learning_rate
        self.n_iter = n_iter
        self.batch_size = batch_size
        self.coef_ = None

    def fit(self, X, y, theta):
        n_samples, n_features = X.shape
        self.coef_ = np.zeros(n_features)
        for i in range(self.n_iter):
            # Compute the full gradient at the current coefficients
            full_gradient = -2 * X.T @ (y - X @ self.coef_) / n_samples
            coef_snapshot = self.coef_.copy()

            # SVRG gradient update
            for _ in range(n_samples // self.batch_size):
                batch_indices = random.sample(range(n_samples), self.batch_size)
                X_batch = X[batch_indices]
                y_batch = y[batch_indices]
                gradient = -2 * X_batch.T @ (y_batch - X_batch @ self.coef_) / self.batch_size
                gradient = np.nan_to_num(gradient)
                snapshot_gradient = -2 * X_batch.T @ (y_batch - X_batch @ coef_snapshot) / self.batch_size
                svrg_gradient = (1/theta)*(gradient - snapshot_gradient) + full_gradient
                self.coef_ -= self.learning_rate * svrg_gradient
            # Hard thresholding step
            self.hard_threshold()

    def hard_threshold(self):
        threshold = np.percentile(np.abs(self.coef_), (100 - self.top_k))
        self.coef_ = np.where(np.abs(self.coef_) >= threshold, self.coef_, 0)

    def transform(self, X):
        # Select non-zero features based on the learned coefficients
        mask = self.coef_ != 0
        return X[:, mask]

# Hard Thresholding Gradient Descent with SARAH
class HardThresholdingSARAH:
    def __init__(self, top_k, learning_rate=0.01, n_iter=100, batch_size=64):
        self.top_k = top_k
        self.learning_rate = learning_rate
        self.n_iter = n_iter
        self.batch_size = batch_size
        self.coef_ = None

    def fit(self, X, y):  # SARAH implementation
        n_samples, n_features = X.shape
        self.coef_ = np.zeros(n_features)
        for i in range(self.n_iter):
            # Take a snapshot of the current coefficients
            coef_snapshot = self.coef_.copy()
        # Compute the initial full gradient at the snapshot
                # Compute the initial full gradient at the snapshot
            v = -2 * X.T @ (y - X @ coef_snapshot) / n_samples

            for _ in range(n_samples // self.batch_size):
                batch_indices = random.sample(range(n_samples), self.batch_size)
                X_batch = X[batch_indices]
                y_batch = y[batch_indices]
                gradient = -2 * X_batch.T @ (y_batch - X_batch @ self.coef_) / self.batch_size
                gradient = np.nan_to_num(gradient)
                snapshot_gradient = -2 * X_batch.T @ (y_batch - X_batch @ coef_snapshot) / self.batch_size
                v = gradient - snapshot_gradient + v
                self.coef_ -= self.learning_rate * v
                            # Hard thresholding step
            self.hard_threshold()

    def hard_threshold(self):
        threshold = np.percentile(np.abs(self.coef_), (100 - self.top_k))
        self.coef_ = np.where(np.abs(self.coef_) >= threshold, self.coef_, 0)

    def transform(self, X):
        # Select non-zero features based on the learned coefficients
        mask = self.coef_ != 0
        return X[:, mask]
# Feature selection function that takes the algorithm name and applies the corresponding method
def feature_selection(algorithm, X_train, y_train, X_test, k=30):
    if algorithm == 'BVRSZHTn':
        top_k = k
        htgd_svrg = HardThresholdingSVRG(top_k=top_k, learning_rate=0.001, n_iter=100, batch_size=64)
        htgd_svrg.fit(X_train, y_train, X_train.shape[0])
        X_train_selected = htgd_svrg.transform(X_train)
        X_test_selected = htgd_svrg.transform(X_test)
    elif algorithm == 'BVRSZHT12':
        top_k = k
        htgd_svrg = HardThresholdingSVRG(top_k=top_k, learning_rate=0.001, n_iter=100, batch_size=64)
        htgd_svrg.fit(X_train, y_train, 2)
        X_train_selected = htgd_svrg.transform(X_train)
        X_test_selected = htgd_svrg.transform(X_test)
    elif algorithm == 'VRSZHT':
        top_k = k
        htgd_svrg = HardThresholdingSVRG(top_k=top_k, learning_rate=0.001, n_iter=100, batch_size=64)
        htgd_svrg.fit(X_train, y_train, 1)
        X_train_selected = htgd_svrg.transform(X_train)
        X_test_selected = htgd_svrg.transform(X_test)
    elif algorithm == 'SARAH':
        top_k = k
        htgd_svrg = HardThresholdingSARAH(top_k=top_k, learning_rate=0.001, n_iter=100, batch_size=64)
        htgd_svrg.fit(X_train, y_train)
        X_train_selected = htgd_svrg.transform(X_train)
        X_test_selected = htgd_svrg.transform(X_test)
    elif algorithm == 'SAGA':
        saga = LogisticRegression(penalty='l1', solver='saga', C=0.8, max_iter=200, tol=1e-3, random_state=42)
        saga.fit(X_train, y_train)

        # Get the non-zero features selected by SAGA
        coef_saga = saga.coef_.ravel()[:X_train.shape[1]]
        mask_saga = coef_saga != 0
        X_train_selected = X_train[:, mask_saga]
        X_test_selected = X_test[:, mask_saga]
    else:
        raise ValueError(f"Unknown algorithm: {algorithm}")

    return X_train_selected, X_test_selected
