% prepare an upper bound of number of iterations for the algorithm
numiterippssg_ub = 600;
% prepare an upper bound of number of sub-iterations for the algorithm
numitersub_ub = 600;
% initialize the counting on the actual number of iterations in the algorithm
step_count_ippssg = 0;

% prepare a d*(numiterippssg_ub+1) zero matrix to store the iterates in each iteration by column
IPP_SSG_w_candidate = zeros(d, numiterippssg_ub+1);
% prepare an 1*(numiterippssg_ub+1) zero array to store the values of objective in each iteration
IPP_SSG_obj_candidate = zeros(1, numiterippssg_ub+1);
% prepare an 1*(numiterippssg_ub+1) zero array to store the values of constraint in each iteration
IPP_SSG_cons_candidate = zeros(1, numiterippssg_ub+1);

% initialize CPU time counting in IPP-SSG method
IPP_SSG_time_set = [0];

% w_init is inherited from the initialization, which is d*1 double
w = w_init;
IPP_SSG_w_candidate(:,1) = w_init;

% compute the value of constraint F(w^(0))
IPP_SSG_obj_candidate(1,1) = dot( 1 - b.*(A*w), 1 - b.*(A*w) > 0 ) / n + SCAD_coefficient * SCAD(w); 

% compute mean_male = mean( sigmoid ( Ac_male * w ) )
sigmoid_mean_male = mean( 1 ./ ( 1 + exp( - Ac(idmalec,:)*w ) ) );
% compute mean_female = mean( sigmoid ( Ac_female * w ) )
sigmoid_mean_female = mean( 1 ./ ( 1 + exp( - Ac(idfemalec,:)*w ) ) );
% get the value of objective G(w^(0))
IPP_SSG_cons_candidate(1,1) = abs( sigmoid_mean_male - sigmoid_mean_female ) - kappa;

fprintf('Iter=%i, obj=%f, cons=%f\n', 0, IPP_SSG_obj_candidate(1,1)+0, IPP_SSG_cons_candidate(1,1)+0);
% add +0 to avoid the error: using fprintf. Function is not defined for sparse inputs

% define constants in the method
% D = sqrt( 8 * C / (rho_hat - rho) );
% sigma = sqrt( min(eigs( ((A').*b') * (b.*A), d )) );
% B = ( M + rho_hat*D ) / sigma;
% epsilon = 0.001;
% delta = (rho_hat - rho) * epsilon^2 / (8 * (1+B)^2 * rho_hat^2);
% tau = min( 1 / (rho_hat - rho + rho_hat*B), 1 ) * (rho_hat-rho) * epsilon^2 / (8 * (1+B)^2 * rho_hat);

for t = 1:numiterippssg_ub
   
    % prepare an 1*(numitersub_ub+1) zero array to store the values of objective in each subiteration
    obj_candidate = zeros(1, numitersub_ub+1);
    % prepare an 1*(numitersub_ub+1) zero array to store the values of constraints in each subiteration
    cons_candidate = zeros(1, numitersub_ub+1);
    % prepare an d*(numitersub_ub+1) zero matrix to store the iterates in each subiteration by column
    w_candidate = zeros(d, numitersub_ub+1);
    % prepare an d*(numitersub_ub+1) zero array to store the step-sizes in each subiteration
    eta_candidate = zeros(1, numitersub_ub);

    % initialize I in subiteration of ippssg method
    index_set = [];

    tic; % for recording CPU runtime
    % w^(t,0) is from the initialization or the previous iteration
    w_candidate(:,1) = w; 
    IPP_SSG_time_set(length(IPP_SSG_time_set)) = IPP_SSG_time_set(length(IPP_SSG_time_set)) + toc; % record CPU runtime
    
    % compute the value of objective
    % F_t(w) = F(w) + rho_hat / 2 * norm(w-w^(t,0), 2)^2 on w = w^(t,0)
    obj_candidate(1,1) = dot( 1 - b.*(A*w), 1 - b.*(A*w) > 0 ) / n + SCAD_coefficient * SCAD(w); 
    
    tic; % for recording CPU runtime
    % compute mean_male = mean( sigmoid ( Ac_male * w ) )
    sigmoid_mean_male = mean( 1 ./ ( 1 + exp( - Ac(idmalec,:)*w ) ) );
    % compute mean_female = mean( sigmoid ( Ac_female * w ) )
    sigmoid_mean_female = mean( 1 ./ ( 1 + exp( - Ac(idfemalec,:)*w ) ) );
    % get the value of constraint
    % G_t(w) = G(w) + rho_hat / 2 * norm(w-w^(t,0), 2)^2 on w = w^(t,0)
    cons_candidate(1,1) = abs( sigmoid_mean_male - sigmoid_mean_female ) - kappa;
    IPP_SSG_time_set(length(IPP_SSG_time_set)) = IPP_SSG_time_set(length(IPP_SSG_time_set)) + toc; % record CPU runtime

    fprintf('Iter=%i, Subiter=%i, obj=%f, cons=%f\n', t-1, 0, obj_candidate(1,1)+0, cons_candidate(1,1)+0);
    % add +0 to avoid the error: using fprintf. Function is not defined for sparse inputs
    
    substep_count_ippssg = 0;

    for k = 1:numitersub_ub

        tic; % for recording CPU runtime

        ippssg_tau = 1e-6; % select from {1e-6, 2e-6, 5e-6, 1e-5}
        
        if cons_candidate(1,k)+0 < ippssg_tau % perform subgradient on objective function

            % compute the objective subgradient as a sum of that of hinge_loss and that of SCAD
            obj_subgrad = - ( (A').*b' ) * ( 1 - b.*(A*w) > 0 ) / n + SCAD_coefficient * SCAD_subgradient(w) ...
                          + rho_hat * ( w - w_candidate(:,1) );
            
            % eta = 2 / ( (rho_hat-rho) * (k+1) + (6*rho_hat)^2 / ( (rho_hat-rho)*k ) );
            ippssg_eta_k = 2e-4; % select from {1e-4, 2e-4, 5e-4, 7.5e-4}
            w = w - ippssg_eta_k * obj_subgrad; % update w^(k)
            % w = min( 0.5*D_X / norm(w,2), 1 ) * w; % projection to the ball with radius as 0.5*D_X
            w_candidate(:,k+1) = w;
            
            index_set = [index_set, k]; % add k to I
            IPP_SSG_time_set = [IPP_SSG_time_set, IPP_SSG_time_set(length(IPP_SSG_time_set))+toc]; % record CPU runtime

            step_count_ippssg = step_count_ippssg + 1;
            substep_count_ippssg = substep_count_ippssg + 1;

        else % perform subgradient on constraint function
            
            % compute constraint subgradient by calling the function "constraint_subgradient"
            cons_subgrad = constraint_subgradient(Ac, idmalec, idfemalec, nummalec, numfemalec, w) ...
                           + rho_hat * ( w - w_candidate(:,1) );
            
            % eta = 2 / ( (rho_hat-rho) * (k+1) + (6*rho_hat)^2 / ( (rho_hat-rho)*k ) );
            ippssg_eta_k = 2e-4; % select from {1e-4, 2e-4, 5e-4, 7.5e-4}
            w = w - ippssg_eta_k * cons_subgrad; % update w^(k)
            % w = min( 0.5*D_X / norm(w,2), 1 ) * w; % projection to the ball with radius as 0.5*D_X
            w_candidate(:,k+1) = w;
            
            IPP_SSG_time_set = [IPP_SSG_time_set, IPP_SSG_time_set(length(IPP_SSG_time_set))+toc]; % record CPU runtime
            
            step_count_ippssg = step_count_ippssg + 1;
            substep_count_ippssg = substep_count_ippssg + 1;
            
        end
        
        % compute the value of objective
        % F_t(w^(k)) = F(w^(t,k)) + rho_hat / 2 * norm(w^(t,k)-w^(t,0), 2)^2
        obj_candidate(1,k+1) = dot( 1 - b.*(A*w), 1 - b.*(A*w) > 0 ) / n + SCAD_coefficient * SCAD(w) ...
                               + rho_hat / 2 * norm(w-w_candidate(:,1), 2)^2;
        
        tic; % for recording CPU runtime
        % compute mean_male = mean( sigmoid ( Ac_male * w ) )
        sigmoid_mean_male = mean( 1 ./ ( 1 + exp( - Ac(idmalec,:)*w ) ) );
        % compute mean_female = mean( sigmoid ( Ac_female * w ) )
        sigmoid_mean_female = mean( 1 ./ ( 1 + exp( - Ac(idfemalec,:)*w ) ) );
        % get the value of constraint
        % G_t(w^(k)) = G(w^(t,k)) + rho_hat / 2 * norm(w^(t,k)-w^(t,0), 2)^2
        cons_candidate(1,k+1) = abs( sigmoid_mean_male - sigmoid_mean_female ) - kappa ...
                                + rho_hat / 2 * norm(w-w_candidate(:,1), 2)^2;
        IPP_SSG_time_set = [IPP_SSG_time_set, IPP_SSG_time_set(length(IPP_SSG_time_set))+toc]; % record CPU runtime

        fprintf('Iter=%i, Subiter=%i, obj=%f, cons=%f\n', t-1, k, obj_candidate(1,k+1)+0, cons_candidate(1,k+1)+0);
     
    end
    
    tic; % for recording CPU runtime
    % return bar(w)^(K) = ( sum_{k in I} (k+1) * w^(k) ) / ( sum_{k in I} (k+1) )
    k_candidate = 1:(substep_count_ippssg+1);
    k_candidate_index_set = k_candidate(index_set);
    w_candidate_index_set = w_candidate(:,index_set);
    bar_w = w_candidate_index_set*(k_candidate_index_set') / sum(k_candidate_index_set);
    
    % w = w^(t) = w^(t,0) = bar(w)^(K)
    w = bar_w;
    IPP_SSG_w_candidate(:,t+1) = w; 
    IPP_SSG_time_set = [IPP_SSG_time_set, IPP_SSG_time_set(length(IPP_SSG_time_set))+toc]; % record CPU runtime
    
    % compute the value of constraint F(w^(t))
    obj = dot( 1 - b.*(A*w), 1 - b.*(A*w) > 0 ) / n + SCAD_coefficient * SCAD(w); 
    IPP_SSG_obj_candidate(1,t+1) = obj;
    
    % compute mean_male = mean( sigmoid ( Ac_male * w ) )
    sigmoid_mean_male = mean( 1 ./ ( 1 + exp( - Ac(idmalec,:)*w ) ) );
    % compute mean_female = mean( sigmoid ( Ac_female * w ) )
    sigmoid_mean_female = mean( 1 ./ ( 1 + exp( - Ac(idfemalec,:)*w ) ) );
    % get the value of objective G(w^(t))
    cons = abs( sigmoid_mean_male - sigmoid_mean_female ) - kappa;
    IPP_SSG_cons_candidate(1,t+1) = cons;

    fprintf('Result of Iter=%i in ippssg: obj=%f, cons=%f\n', t-1, obj+0, cons+0);

end

% return w^(T) = bar(w)^(K) in the last iteration
fprintf('Result of ippssg with last iteration: iter=%i, obj=%f, cons=%f\n', step_count_ippssg, obj+0, cons+0);