import numpy as np
import time

def FLAiR_ELM_function(trainX, trainY, testX, testY, option, No_of_class,
                            warmup_epochs_list=[2, 4, 6, 8, 10], lr=0.01,
                            beta1=0.9, beta2=0.999, epsilon=1e-8):

    def one_hot_encode(labels, num_classes):
        one_hot = np.zeros((len(labels), num_classes))
        one_hot[np.arange(len(labels)), labels.astype(int)] = 1
        return one_hot

    trainY_one_hot = one_hot_encode(trainY, No_of_class)
    testY_one_hot = one_hot_encode(testY, No_of_class)

    best_result = None
    best_test_acc = -np.inf
    all_results = []

    for warmup_epochs in warmup_epochs_list:
        start_time = time.time()

        N = option['N']
        C = option['C']
        s = 1
        activation = option['activation']
        Nsample, Nfea = trainX.shape

        W = np.random.uniform(-s, s, (Nfea, N)).astype(np.float32)
        b = np.random.uniform(0, s, N).astype(np.float32)

        m_W = np.zeros_like(W)
        v_W = np.zeros_like(W)
        m_b = np.zeros_like(b)
        v_b = np.zeros_like(b)

        for epoch in range(1, warmup_epochs + 1):
            H = np.dot(trainX, W) + b

            if activation == 1:
                H_act = 1 / (1 + np.exp(-H))
                dH = H_act * (1 - H_act)
            elif activation == 2:
                H_act = np.sin(H)
                dH = np.cos(H)
            elif activation == 3:
                H_act = np.maximum(0, 1 - np.abs(H))
                dH = np.where(np.abs(H) < 1, -1, 0)
            elif activation == 4:
                H_act = np.exp(-H**2)
                dH = -2 * H * H_act
            elif activation == 5:
                H_act = np.tanh(H)
                dH = 1 - H_act**2
            elif activation == 6:
                H_act = np.maximum(0, H)
                dH = (H > 0).astype(float)

            if H_act.shape[1] < Nsample:
                beta = np.linalg.solve(np.eye(H_act.shape[1]) / C + H_act.T @ H_act, H_act.T @ trainY_one_hot)
            else:
                beta = H_act.T @ np.linalg.solve(np.eye(Nsample) / C + H_act @ H_act.T, trainY_one_hot)

            error = H_act @ beta - trainY_one_hot
            grad_H = (error @ beta.T) * dH
            grad_W = trainX.T @ grad_H / Nsample
            grad_b = np.mean(grad_H, axis=0)

            m_W = beta1 * m_W + (1 - beta1) * grad_W
            v_W = beta2 * v_W + (1 - beta2) * (grad_W ** 2)
            m_W_hat = m_W / (1 - beta1 ** epoch)
            v_W_hat = v_W / (1 - beta2 ** epoch)
            W -= lr * m_W_hat / (np.sqrt(v_W_hat) + epsilon)

            m_b = beta1 * m_b + (1 - beta1) * grad_b
            v_b = beta2 * v_b + (1 - beta2) * (grad_b ** 2)
            m_b_hat = m_b / (1 - beta1 ** epoch)
            v_b_hat = v_b / (1 - beta2 ** epoch)
            b -= lr * m_b_hat / (np.sqrt(v_b_hat) + epsilon)

        # Final training of output layer using closed-form (no direct link, just H)  (For ELM)
        H = np.dot(trainX, W) + b
        if activation == 1:
            H = 1 / (1 + np.exp(-H))
        elif activation == 2:
            H = np.sin(H)
        elif activation == 3:
            H = np.maximum(0, 1 - np.abs(H))
        elif activation == 4:
            H = np.exp(-H**2)
        elif activation == 5:
            H = np.tanh(H)
        elif activation == 6:
            H = np.maximum(0, H)

        if H.shape[1] < Nsample:
            beta = np.linalg.solve(np.eye(H.shape[1]) / C + H.T @ H, H.T @ trainY_one_hot)
        else:
            beta = H.T @ np.linalg.solve(np.eye(Nsample) / C + H @ H.T, trainY_one_hot)

        trainY_pred = H @ beta
        train_time = time.time() - start_time

        train_probs = np.exp(trainY_pred - np.max(trainY_pred, axis=1, keepdims=True))
        train_probs /= np.sum(train_probs, axis=1, keepdims=True)
        train_preds = np.argmax(train_probs, axis=1)
        train_acc = np.mean(train_preds == np.argmax(trainY_one_hot, axis=1)) * 100

        # Test
        start_time = time.time()
        H = np.dot(testX, W) + b
        if activation == 1:
            H = 1 / (1 + np.exp(-H))
        elif activation == 2:
            H = np.sin(H)
        elif activation == 3:
            H = np.maximum(0, 1 - np.abs(H))
        elif activation == 4:
            H = np.exp(-H**2)
        elif activation == 5:
            H = np.tanh(H)
        elif activation == 6:
            H = np.maximum(0, H)

        testY_pred = H @ beta
        test_time = time.time() - start_time

        test_probs = np.exp(testY_pred - np.max(testY_pred, axis=1, keepdims=True))
        test_probs /= np.sum(test_probs, axis=1, keepdims=True)
        test_preds = np.argmax(test_probs, axis=1)
        test_acc = np.mean(test_preds == np.argmax(testY_one_hot, axis=1)) * 100

        if test_acc > best_test_acc:
            best_test_acc = test_acc
            best_result = (warmup_epochs, train_acc, test_acc, train_time, test_time)

        all_results.append((warmup_epochs, train_acc, test_acc, train_time, test_time))

    return best_result, all_results
