function[f_vec,g_vec,time_vec,x,acc_vec] = PB_APG(fun_f,grad_f,grad_g,fun_g,TSA,param,x0)
% note that PB-APG is identical to R-APM if we choose 
% the penalty paramter = 1/(regularization parameter in R-APM)

eta = 1e-4;
% gamma = 1.5e-4;
gamma = 1/(param.L_f*eta+param.L_g);
lambda=param.lam;

f_vec = [];
g_vec = [];
time_vec = [];
acc_vec = [];
x = x0;
y = x0;
t = 1;

%% algorithm
maxiter = param.maxiter;
maxtime = param.maxtime;
tic;
for k = 1 : maxiter 
    x_prev = x;
    % Descent step
    x = y - gamma*(grad_g(y)+eta*grad_f(y));
    % Projection step
%     x = ProjectOntoL1Ball(x,lambda);
    x = ProjectOntoL2Ball0(x,lambda);
    t_prev = t;
    t = 0.5 + sqrt(0.25+t^2);
    y = x + (t_prev-1)*(x-x_prev)/t;
    
    cpu_t = toc;
    f_vec = [f_vec;fun_f(x)];
    g_vec = [g_vec;fun_g(x)];
    time_vec = [time_vec;cpu_t];
    % test set accuracy
    acc_vec = [acc_vec;TSA(x)];
    if cpu_t>maxtime
        break
    end
end