function [TrainingTime,TestingTime,TrainingAccuracy,TestingAccuracy] = StaR_RVFL_ELM_train(trainX, trainY, testX, testY, option)

% Parameters
N = option.N;                   % Number of hidden nodes
C = option.C;                   % Regularization parameter
activation = option.activation; % Activation function
nclass = option.nclass;
rho_min = option.rho_min;      % Minimum singular value for StaR
rho_target = option.rho_target; % Target spectral radius for StaR

% Size of the input data
[Nsample, Nfea] = size(trainX);

% One-hot encode training labels
dataY_train_temp = zeros(size(trainY, 1), nclass);
for i = 1:size(trainY, 1)
    dataY_train_temp(i, trainY(i) + 1) = 1; % classes from 0
end

tic; % Start timing

% ----------------------------------------------------------
% Step 1: Initialize and regulate random weights using StaR
% ----------------------------------------------------------
W_init = rand(Nfea, N) * 2 - 1;
W = regulate_weights_using_StaR(W_init, rho_min, rho_target); % StaR regulation
b = rand(1, N);  % Biases remain unregulated

% Step 2: Compute hidden layer outputs
X1 = trainX * W + repmat(b, Nsample, 1);

% Step 3: Apply Activation
switch activation
    case 1, X1 = sigmoid(X1);
    case 2, X1 = sin(X1);
    case 3, X1 = tribas(X1);
    case 4, X1 = radbas(X1);
    case 5, X1 = tansig(X1);
    case 6, X1 = relu(X1);
end

% Step 4: Concatenate or not (choose ELM vs RVFL)
X = X1;                   % For ELM
% X = [trainX, X1];       % Uncomment for RVFL

X = [X, ones(Nsample, 1)]; % Add bias term

% Step 5: Compute output weights (beta)
if size(X, 2) < Nsample
    beta = (eye(size(X, 2)) * (1 / C) + X' * X) \ X' * dataY_train_temp;
else
    beta = X' * ((eye(size(X, 1)) * (1 / C) + X * X') \ dataY_train_temp);
end

PredictedTrainLabel = X * beta;
TrainingTime = toc;

% Step 6: Training Accuracy
trainY_temp1 = bsxfun(@minus, PredictedTrainLabel, max(PredictedTrainLabel, [], 2));
prob_scores = bsxfun(@rdivide, exp(trainY_temp1), sum(exp(trainY_temp1), 2));
[~, indx] = max(prob_scores, [], 2);
[~, ind_corrClass] = max(dataY_train_temp, [], 2);
TrainingAccuracy = mean(indx == ind_corrClass) * 100;

% -------------------
% Testing phase
% -------------------
tic;
Nsample_test = size(testX, 1);

dataY_test_temp = zeros(size(testY, 1), nclass);
for i = 1:size(testY, 1)
    dataY_test_temp(i, testY(i) + 1) = 1;
end

X11 = testX * W + repmat(b, Nsample_test, 1);

switch activation
    case 1, X11 = sigmoid(X11);
    case 2, X11 = sin(X11);
    case 3, X11 = tribas(X11);
    case 4, X11 = radbas(X11);
    case 5, X11 = tansig(X11);
    case 6, X11 = relu(X11);
end

Xt = X11;                % For ELM
% Xt = [testX, X11];     % Uncomment for RVFL
Xt = [Xt, ones(Nsample_test, 1)];

PredictedTestLabel = Xt * beta;
TestingTime = toc;

testY_temp1 = bsxfun(@minus, PredictedTestLabel, max(PredictedTestLabel, [], 2));
prob_scores = bsxfun(@rdivide, exp(testY_temp1), sum(exp(testY_temp1), 2));
[~, indx] = max(prob_scores, [], 2);
[~, ind_corrClass] = max(dataY_test_temp, [], 2);
TestingAccuracy = mean(indx == ind_corrClass) * 100;

end
