function test_AccerlatedProximalGradient
clear, clc;close all;
rand('seed',0);
randn('seed',0);
%  min  1/2 || A x - y||^2 + lambda * ||x||_1
% f(x) + g(x)
% 0.5 L ||x-xt||^2 + <x-xt,g> + g(x)
% 0.5 L ||x-(xt-g/L)||^2 + g(x)
% proximal mapping:

% 0.5 theta ||x - a||^2 + g(x)


m=100;  n=1000;    % The data matrix is of size m x n
A=randn(m,n);       % the data matrix
A(1,3)=1000;
y = randn(m,1);
lambda=0.2;
HandleObjSmooth = @(x)computeObj(x,A,y);
HandleObjNonSmooth = @(x)lambda*sum(abs(x));
x=zeros(n,1);
HandleProx = @(a,theta)computeprox(a,theta,lambda);
tot = 1e-12;
[x1, his1]= AccerlatedProximalGradient(x,HandleObjSmooth,HandleObjNonSmooth,HandleProx,tot);
% figure;plot(his)
[x2, his2]= AccerlatedProximalGradientMonotone(x,HandleObjSmooth,HandleObjNonSmooth,HandleProx,tot);
Lip = norm(A*A');
[x3, his3]= AccerlatedProximalGradientMonotone2(x,HandleObjSmooth,HandleObjNonSmooth,HandleProx,tot,Lip);
figure; plot([1:length(his1)],his1,[1:length(his2)],his2, [1:length(his3)],his3)
legend('first','second','third');
min(his1)-min(his2)
min(his3)-min(his2)

function [x,histroy]=AccerlatedProximalGradientMonotone2(x,HandleObjSmooth,HandleObjNonSmooth,HandleProx,tot,Lip)
% This program solves the following optimization problem:
% f(x) + g(x)
% where we assume that f is smooth g is non-smooth
% HandleObjSmooth:           x   ->  [fobj,grad]
% HandleObjNonSmooth:        x   ->  [fobj]
% HandleProx:          [theta,a] ->  arg min_{x} 0.5 theta || x - a ||^2 + g(x)

% One xample:

% function example_LeastR
% clear, clc;
% %  min  1/2 || A x - y||^2 + lambda * ||x||_1
% % f(x) + g(x)
% % 0.5 L ||x-xt||^2 + <x-xt,g> + g(x)
% % 0.5 L ||x-(xt-g/L)||^2 + g(x)
% % proximal mapping:
%
% % 0.5 theta ||x - a||^2 + g(x)
%
%
% m=1000;  n=100;    % The data matrix is of size m x n
% A=randn(m,n);       % the data matrix
% y = randn(m,1);
% lambda=0.2;
% HandleObjSmooth = @(x)computeObj(x,A,y);
% HandleObjNonSmooth = @(x)lambda*sum(abs(x));
% x=zeros(n,1);
% HandleProx = @(theta,a)computeprox(theta,a,lambda);
% [x1, his]= AccerlatedProximalGradient(x,HandleObjSmooth,HandleObjNonSmooth,HandleProx);
% plot(his)
%
% function [fobj,grad] = computeObj(x,A,y)
% diff = A*x-y;
% fobj = 1/2*norm(diff)^2 ;
% grad = A'*diff ;
%
% function [x] = computeprox(theta,a,lambda)
% % 0.5 theta ||x - a||^2 + g(x)
% [x] = threadholding_l1(a,lambda/theta);
%
% function [x] = threadholding_l1(a,lambda)
% % solving the following OP:
% % min_{x} 0.5 ||x - a||^2 + lambda * sum(abs(x))
% x = sign(a).*max(0,abs(a)-lambda);

% last modified: 2016-01-29

maxiter = 1000;
[n,d]=size(x);
flag=0;


y = x;
alpha=1;
HandleObj = @(x)HandleObjSmooth(x)+ HandleObjNonSmooth(x);
f_best = HandleObj(x);
histroy(1)=f_best;
memory=10;
for iter=1:maxiter,
    
    [f_old,g_old] = HandleObjSmooth(y);
    
   L = Lip; 
   
   max_in = 100000000;
    for in=1:max_in
        z = y - g_old/L;
        % min_{x} 0.5 L ||x-(xt-g/L)||^2 + z * ||x||_1
        z = HandleProx(z,L);
        v = z - y;
        r_sum=mdot(v,v);
        f_new = HandleObjSmooth(z);
        l_sum = f_new - f_old - mdot(g_old,z-y);
        if(l_sum > 0.5*r_sum*L)
            break;
        else
            L=L/2;
        end
    end  
    L = L*2;
    
   z = HandleProx(y - g_old/L,L);
    
  
    alphap = alpha; alpha = (1+sqrt(4*alpha*alpha +1))/2;
    
    
    x_old = x;
    fx_old = HandleObj(x);
%      fz = f_new + HandleObjNonSmooth(z);
      fz = HandleObj(z);
    if(fz<fx_old)
        x = z;
    else
        x = x;
    end
    
    y = x + (alphap/alpha)*(z-x) + ((alphap-1)/alpha)*(x-x_old);
    
    
    histroy(iter+1)=min(fx_old,fz);
    
    FDiff = abs(histroy(iter) - histroy(iter+1)) / (1+ abs((histroy(iter))));
    %  XDiff = s_norm / sqrt(n);
    if iter <= memory
        stop_seq_f(iter) = FDiff;
        %                 stop_seq_x(iter) = XDiff;
    else
        stop_seq_f  = [stop_seq_f(2:end) FDiff];
        %                 stop_seq_x  = [stop_seq_x(2:end); XDiff];
    end
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % last_k_xtol=mean(stop_seq_x);
    last_k_ftol=mean(stop_seq_f);
    if (last_k_ftol <1e-17)
        if(histroy(end)<histroy(1)),
%             break;
        end
    end
    
    
    
    
    %     if (iterStep>50 && flag),break;end
end
function [x,histroy]=AccerlatedProximalGradientMonotone(x,HandleObjSmooth,HandleObjNonSmooth,HandleProx,tot)
% This program solves the following optimization problem:
% f(x) + g(x)
% where we assume that f is smooth g is non-smooth
% HandleObjSmooth:           x   ->  [fobj,grad]
% HandleObjNonSmooth:        x   ->  [fobj]
% HandleProx:          [theta,a] ->  arg min_{x} 0.5 theta || x - a ||^2 + g(x)

% One xample:

% function example_LeastR
% clear, clc;
% %  min  1/2 || A x - y||^2 + lambda * ||x||_1
% % f(x) + g(x)
% % 0.5 L ||x-xt||^2 + <x-xt,g> + g(x)
% % 0.5 L ||x-(xt-g/L)||^2 + g(x)
% % proximal mapping:
%
% % 0.5 theta ||x - a||^2 + g(x)
%
%
% m=1000;  n=100;    % The data matrix is of size m x n
% A=randn(m,n);       % the data matrix
% y = randn(m,1);
% lambda=0.2;
% HandleObjSmooth = @(x)computeObj(x,A,y);
% HandleObjNonSmooth = @(x)lambda*sum(abs(x));
% x=zeros(n,1);
% HandleProx = @(theta,a)computeprox(theta,a,lambda);
% [x1, his]= AccerlatedProximalGradient(x,HandleObjSmooth,HandleObjNonSmooth,HandleProx);
% plot(his)
%
% function [fobj,grad] = computeObj(x,A,y)
% diff = A*x-y;
% fobj = 1/2*norm(diff)^2 ;
% grad = A'*diff ;
%
% function [x] = computeprox(theta,a,lambda)
% % 0.5 theta ||x - a||^2 + g(x)
% [x] = threadholding_l1(a,lambda/theta);
%
% function [x] = threadholding_l1(a,lambda)
% % solving the following OP:
% % min_{x} 0.5 ||x - a||^2 + lambda * sum(abs(x))
% x = sign(a).*max(0,abs(a)-lambda);

% last modified: 2016-01-29

maxiter = 1000;
[n,d]=size(x);
flag=0;


L=1;
y = x;
alpha=1;
HandleObj = @(x)HandleObjSmooth(x)+ HandleObjNonSmooth(x);
f_best = HandleObj(x);
histroy(1)=f_best;
memory=10;
for iter=1:maxiter,
    
    [f_old,g_old] = HandleObjSmooth(y);
    max_in = 1000;
    for in=1:max_in
        z = y - g_old/L;
        % min_{x} 0.5 L ||x-(xt-g/L)||^2 + z * ||x||_1
        z = HandleProx(z,L);
        v = z - y;
        r_sum=mdot(v,v);
        f_new = HandleObjSmooth(z);
        l_sum = f_new - f_old - mdot(g_old,z-y);
        if(l_sum <= 0.5*r_sum*L)
            break;
        else
            L=2*L;
        end
    end
    
    if(in==max_in)
        fprintf('warning! Lipschitz too large!');
    end
    
    alphap = alpha; alpha = (1+sqrt(4*alpha*alpha +1))/2;
    
    
    x_old = x;
    fx_old = HandleObj(x);
     fz = f_new + HandleObjNonSmooth(z);
%     fz = HandleObj(z);
    if(fz<fx_old)
        x = z;
    else
        x = x;
    end
    
    y = x + (alphap/alpha)*(z-x) + ((alphap-1)/alpha)*(x-x_old);
    
    
    histroy(iter+1)=min(fx_old,fz);
    
    FDiff = abs(histroy(iter) - histroy(iter+1)) / (1+ abs((histroy(iter))));
    %  XDiff = s_norm / sqrt(n);
    if iter <= memory
        stop_seq_f(iter) = FDiff;
        %                 stop_seq_x(iter) = XDiff;
    else
        stop_seq_f  = [stop_seq_f(2:end) FDiff];
        %                 stop_seq_x  = [stop_seq_x(2:end); XDiff];
    end
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % last_k_xtol=mean(stop_seq_x);
    last_k_ftol=mean(stop_seq_f);
    if (last_k_ftol <tot)
        if(histroy(end)<histroy(1)),
            break;
        end
    end
    
    
    
    
    %     if (iterStep>50 && flag),break;end
end


function [fobj,grad] = computeObj(x,A,y)
diff = A*x-y;
fobj = 1/2*norm(diff)^2 ;
grad = A'*diff ;

function [x] = computeprox(a,theta,lambda)
% 0.5 theta ||x - a||^2 + g(x)
[x] = threadholding_l1(a,lambda/theta);

function [x] = threadholding_l1(a,lambda)
% solving the following OP:
% min_{x} 0.5 ||x - a||^2 + lambda * sum(abs(x))
x = sign(a).*max(0,abs(a)-lambda);



function [x_best,histroy]=AccerlatedProximalGradient(x,HandleObjSmooth,HandleObjNonSmooth,HandleProx,tot)
% This program solves the following optimization problem:
% f(x) + g(x)
% where we assume that f is smooth g is non-smooth
% HandleObjSmooth:           x   ->  [fobj,grad]
% HandleObjNonSmooth:        x   ->  [fobj]
% HandleProx:          [theta,a] ->  arg min_{x} 0.5 theta || x - a ||^2 + g(x)

% One xample:

% function example_LeastR
% clear, clc;
% %  min  1/2 || A x - y||^2 + lambda * ||x||_1
% % f(x) + g(x)
% % 0.5 L ||x-xt||^2 + <x-xt,g> + g(x)
% % 0.5 L ||x-(xt-g/L)||^2 + g(x)
% % proximal mapping:
%
% % 0.5 theta ||x - a||^2 + g(x)
%
%
% m=1000;  n=100;    % The data matrix is of size m x n
% A=randn(m,n);       % the data matrix
% y = randn(m,1);
% lambda=0.2;
% HandleObjSmooth = @(x)computeObj(x,A,y);
% HandleObjNonSmooth = @(x)lambda*sum(abs(x));
% x=zeros(n,1);
% HandleProx = @(theta,a)computeprox(theta,a,lambda);
% [x1, his]= AccerlatedProximalGradient(x,HandleObjSmooth,HandleObjNonSmooth,HandleProx);
% plot(his)
%
% function [fobj,grad] = computeObj(x,A,y)
% diff = A*x-y;
% fobj = 1/2*norm(diff)^2 ;
% grad = A'*diff ;
%
% function [x] = computeprox(theta,a,lambda)
% % 0.5 theta ||x - a||^2 + g(x)
% [x] = threadholding_l1(a,lambda/theta);
%
% function [x] = threadholding_l1(a,lambda)
% % solving the following OP:
% % min_{x} 0.5 ||x - a||^2 + lambda * sum(abs(x))
% x = sign(a).*max(0,abs(a)-lambda);

% last modified: 2016-01-29

maxiter = 1000;
[n,d]=size(x);
flag=0;


L=1;


xp=x;
xxp=zeros(n,d);
alpha=1; s=x;
x_best = x;
f_best = HandleObjSmooth(x)+ HandleObjNonSmooth(x);
histroy(1)=f_best;
memory=10;
for iter=1:maxiter,
    %     fprintf('*');
    [f_old,g_old] = HandleObjSmooth(s);
    xp=x;
    max_in = 10000;
    for in=1:max_in,
        v=s-g_old/L;
        % min_{x} 0.5 L ||x-(xt-g/L)||^2 + z * ||x||_1
        [x]=HandleProx(v,L);
        v=x-s;
        r_sum=mdot(v,v);
        f_new = HandleObjSmooth(x);
        l_sum = f_new - f_old - mdot(g_old,x-s);
        if (r_sum <=1e-100)
            flag=1; % this shows that, the gradient step makes little improvement
            break;
        end
        if(l_sum <= 0.5*r_sum*L)
            break;
        else
            %             L=max(2*L,l_sum/r_sum);
            L=2*L;
        end
        if(in==max_in)
            fprintf('warning! Lipschitz too large!');
        end
    end
    
    alphap=alpha; alpha= (1+ sqrt(4*alpha*alpha +1))/2;
    beta=(alphap-1)/alpha;
    s = x + beta* xxp;
    xxp = x-xp;
    f_curr = f_new  + HandleObjNonSmooth(x);
    if(f_curr<f_best)
        f_best = f_curr;
        x_best = x;
    end
    
    
    histroy(iter+1)=f_curr;
    
    FDiff = abs(histroy(iter) - histroy(iter+1)) / (1+ abs((histroy(iter))));
    %  XDiff = s_norm / sqrt(n);
    if iter <= memory
        stop_seq_f(iter) = FDiff;
        %                 stop_seq_x(iter) = XDiff;
    else
        stop_seq_f  = [stop_seq_f(2:end) FDiff];
        %                 stop_seq_x  = [stop_seq_x(2:end); XDiff];
    end
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % last_k_xtol=mean(stop_seq_x);
    last_k_ftol=mean(stop_seq_f);
    if (last_k_ftol <tot)
        if(histroy(end)<histroy(1)),
            break;
        end
    end
    
    
    
    
    %     if (iterStep>50 && flag),break;end
end


function [r] = mdot(x,y)
r = sum(sum(x.*y));








function [x_best,histroy]=AccerlatedProximalGradient2(x,HandleObjSmooth,HandleObjNonSmooth,HandleProx,tot,A)
maxiter = 1000;
[n,d]=size(x);
flag=0;
[eigH]=eig(A'*A);

mu = min(eigH);
L = max(eigH);

x_bar = x;
alpha = sqrt(mu/L);
memory = 10;
x_best = x;
f_best = HandleObjSmooth(x)+ HandleObjNonSmooth(x);
histroy(1)=f_best;

for iter=1:maxiter,
    [f_old,g_old] = HandleObjSmooth(x_bar);
    x_old = x;
    x = x_bar - g_old/L;
    x = HandleProx(x,L);
    x_bar = x + (1-alpha)/(1+alpha)*(x-x_old);
    
    f_new = HandleObjSmooth(x);
    f_curr = f_new  + HandleObjNonSmooth(x);
    histroy(iter+1)=f_curr;
    
    FDiff = abs(histroy(iter) - histroy(iter+1)) / (1+ abs((histroy(iter))));
    %  XDiff = s_norm / sqrt(n);
    if iter <= memory
        stop_seq_f(iter) = FDiff;
        %                 stop_seq_x(iter) = XDiff;
    else
        stop_seq_f  = [stop_seq_f(2:end) FDiff];
        %                 stop_seq_x  = [stop_seq_x(2:end); XDiff];
    end
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % last_k_xtol=mean(stop_seq_x);
    last_k_ftol=mean(stop_seq_f);
    if (last_k_ftol <tot)
        if(histroy(end)<histroy(1)),
            break;
        end
    end
    
    
    
    
    %     if (iterStep>50 && flag),break;end
end
