n = 1e2;% n is the dimension

L = 1e2;% L is the Hessian Lipschitz smoothness parameter

U = RandOrthMat(n, 1e-15);
V = RandOrthMat(n, 1e-15);
S = eye(n);
for i = 1:n
    S(i, i) = 1./(20^(i./n));
end
A = U*S*transpose(V);

b = randn(n, 1);

initial_point = 1e3*randn(2*n, 1);
norm_1 = norm(loss_gradient(A, b, L, initial_point))^2;

iterations = 1e3;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Adaptive Second Order Method with Parameter L

z_0 = initial_point;
z_1 = initial_point;
z_T = 0;

a_0 = 0;
sum_a = 0;
eta = L;

result_1 = [1];

for i = 1:iterations
    e = loss_gradient(A, b, L, z_1) - loss_gradient(A, b, L, z_0) - loss_hessian(A, L, z_0)*(z_1 - z_0);
    temp = sqrt(2)*(eta^2)*L*a_0*norm(e);
    a_1 = 1./(temp + sqrt(temp^2 + 2*sqrt(2)*(eta^2)*L*norm(loss_gradient(A, b, L, z_1))));
    z_2 = z_1 - ((1./eta)*eye(2*n) + a_1*loss_hessian(A, L, z_1))\(a_1*loss_gradient(A, b, L, z_1) + a_0*e);
    z_0 = z_1;
    z_1 = z_2;
    z_T = (z_T*sum_a + a_1*z_2)./(sum_a + a_1);
    sum_a = sum_a + a_1;
    a_0 = a_1;
    result_1 = [result_1, norm(loss_gradient(A, b, L, z_T))^2./norm_1];
end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Parameter-free Adaptive Second Order Method

z_0 = initial_point;
z_1 = initial_point;
z_T = 0;

a_0 = 0;
sum_a = 0;
eta = 1e-3;

result_2 = [1];

for i = 1:iterations
    e = loss_gradient(A, b, L, z_1) - loss_gradient(A, b, L, z_0) - loss_hessian(A, L, z_0)*(z_1 - z_0);
    eta = min(eta, norm(z_1 - z_0)^2./(2*norm(e)));
    temp = eta*a_0*norm(e);
    a_1 = 0.5./(temp + sqrt(temp^2 + eta*norm(loss_gradient(A, b, L, z_1))));
    z_2 = z_1 - ((1./eta)*eye(2*n) + a_1*loss_hessian(A, L, z_1))\(a_1*loss_gradient(A, b, L, z_1) + a_0*e);
    z_0 = z_1;
    z_1 = z_2;
    z_T = (z_T*sum_a + a_1*z_2)./(sum_a + a_1);
    sum_a = sum_a + a_1;
    a_0 = a_1;
    result_2 = [result_2, norm(loss_gradient(A, b, L, z_T))^2./norm_1];
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Homotopy Inexact Proximal-Newton Extragradient Method

HIPNEX = [1];
x = initial_point;
y = initial_point;
v = 0;
sigma = 0.25;
theta = 0.5*(1 - sigma)*(1 - 2*sigma);
theta_hat = theta*(sigma./(1 - sigma) + theta./(1 - sigma)^2);
eta = 4*theta_hat./L;
tau = 2*(theta - theta_hat)./(2*theta + eta*L./2 + sqrt((2*theta + eta*L./2)^2 - 4*theta*(theta - theta_hat)));
lambda = sqrt(theta./(L*norm(loss_gradient(A, b, L, initial_point))));

for i = 1:iterations
    if loss_gradient(A, b, L, y) + v == 0
        HIPNEX = [HIPNEX, norm(loss_gradient(A, b, L, y))^2./norm_1];
        continue
    end
    if 0.5*lambda*L*norm(lambda*(loss_gradient(A, b, L, y) + v) + y - x) <= theta_hat
        y_new = y;
        v_new = v;
    else
        y_new = y - (eye(2*n) + lambda*loss_hessian(A, L, y))\(lambda*loss_gradient(A, b, L, y) + y - x);
        v_new = y_new;
    end
    if lambda*norm(y_new - x) >= eta
        x_new = x - tau*lambda*(loss_gradient(A, b, L, y_new) + v_new);
        lambda_new = (1 - tau)*lambda;
    else
        x_new = x;
        lambda_new = lambda./(1 - tau);
    end
    x = x_new;
    y = y_new;
    v = v_new;
    lambda = lambda_new;
    HIPNEX = [HIPNEX, norm(loss_gradient(A, b, L, x))^2./norm_1];
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Generalized Optimistic Second Order Method

SO2 = [1];
sigma = 1;
alpha = 0.5;
beta = 0.5;

z = initial_point;
v = zeros(2*n, 1);
F = loss_gradient(A, b, L, z);

for it = 1:iterations
    eta = sigma;
    J = loss_hessian(A, L, z);
    flag = false;
    counter = 0;
    while ~flag
        if counter >0
            eta = beta*eta;
        end
        direction = (eta*J + eye(2*n))\(-eta*F - v);
        z_new = z + direction;
        approx_first = F + J*(z_new - z);
        distance = norm(direction);
        F_new = loss_gradient(A, b, L, z_new);
        res_new = F_new - approx_first;
        flag = eta*norm(res_new) <= 1/2*alpha*distance;
        counter = counter + 1;
    end

    z = z_new;
    F = F_new;
    v = res_new*eta;
    sigma = eta/beta;
    norm_F = norm(F_new);
    SO2 = [SO2, norm_F^2./norm_1];

end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Plot the Figures

iter = 1:(iterations + 1);

figure
semilogy(iter, result_1, 'r-*', 'MarkerIndices', 1:50:iterations, 'LineWidth', 3);
grid on
hold on
semilogy(iter, result_2, 'k-*', 'MarkerIndices', 1:50:iterations, 'LineWidth', 3);
semilogy(iter, HIPNEX, 'b-*', 'MarkerIndices', 1:50:iterations, 'LineWidth', 3);
semilogy(iter, SO2, 'g-*', 'MarkerIndices', 1:50:iterations, 'LineWidth', 3);
legend({'Adaptive SOM I', 'Adaptive SOM II', 'HIPNEX', 'Optimal SOM'},'Interpreter','latex','fontsize',20,'Location','northeast')
xlim([0 1000])
ylim([1e-25 1e1])
xticks(0:100:1000)
ax = gca;
ax.FontSize = 15;
xlabel('Number of iterations $T$','Interpreter','latex','fontsize',20)
ylabel('$\frac{\|F(z_T)\|^2}{\|F(z_0)\|^2}$','Interpreter','latex','fontsize',20)
set(gcf,'position',[0,0,600,400])
hold off