n = 1e2;% n is the dimension

L = 1e2;% L is the Hessian Lipschitz smoothness parameter

U = RandOrthMat(n, 1e-15);
V = RandOrthMat(n, 1e-15);
S = eye(n);
for i = 1:n
    S(i, i) = 1./(20^(i./n));
end
A = U*S*transpose(V);

b = randn(n, 1);

initial_point = 1e3*randn(2*n, 1);
norm_1 = norm(loss_gradient(A, b, L, initial_point))^2;

iterations = 2e3;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Adaptive Second Order Method with Parameter L

z_0 = initial_point;
z_1 = initial_point;
z_T = 0;

a_0 = 0;
sum_a = 0;
eta = L;

result_1_x = [0];
result_1_y = [1];
time = 0.0;
gradient = loss_gradient(A, b, L, z_0);

for i = 1:iterations
    t_start = tic;
    gradient_new = loss_gradient(A, b, L, z_1);
    e = gradient_new - gradient - loss_hessian(A, L, z_0)*(z_1 - z_0);
    temp = sqrt(2)*(eta^2)*L*a_0*norm(e);
    a_1 = 1./(temp + sqrt(temp^2 + 2*sqrt(2)*(eta^2)*L*norm(gradient_new)));
    z_2 = z_1 - ((1./eta)*eye(2*n) + a_1*loss_hessian(A, L, z_1))\(a_1*gradient_new + a_0*e);
    z_0 = z_1;
    z_1 = z_2;
    z_T = (z_T*sum_a + a_1*z_2)./(sum_a + a_1);
    sum_a = sum_a + a_1;
    a_0 = a_1;
    gradient = gradient_new;
    t_end = toc(t_start);
    time = time + t_end;
    result_1_x = [result_1_x, time];
    result_1_y = [result_1_y, norm(loss_gradient(A, b, L, z_T))^2./norm_1];
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Parameter-free Adaptive Second Order Method

z_0 = initial_point;
z_1 = initial_point;
z_T = 0;

a_0 = 0;
sum_a = 0;
eta = 3e-3;

result_2_x = [0];
result_2_y = [1];
time = 0.0;
gradient = loss_gradient(A, b, L, z_0);

for i = 1:iterations
    t_start = tic;
    gradient_new = loss_gradient(A, b, L, z_1);
    e = gradient_new - gradient - loss_hessian(A, L, z_0)*(z_1 - z_0);
    eta = min(eta, norm(z_1 - z_0)^2./(2*norm(e)));
    temp = eta*a_0*norm(e);
    a_1 = 0.5./(temp + sqrt(temp^2 + eta*norm(gradient_new)));
    z_2 = z_1 - ((1./eta)*eye(2*n) + a_1*loss_hessian(A, L, z_1))\(a_1*gradient_new + a_0*e);
    z_0 = z_1;
    z_1 = z_2;
    z_T = (z_T*sum_a + a_1*z_2)./(sum_a + a_1);
    sum_a = sum_a + a_1;
    a_0 = a_1;
    gradient = gradient_new;
    t_end = toc(t_start);
    time = time + t_end;
    result_2_x = [result_2_x, time];
    result_2_y = [result_2_y, norm(loss_gradient(A, b, L, z_T))^2./norm_1];
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Homotopy Inexact Proximal-Newton Extragradient Method

time = 0.0;
HIPNEX_x = [0];
HIPNEX_y = [1];

x = initial_point;
y = initial_point;
v = 0;
sigma = 0.25;
theta = 0.5*(1 - sigma)*(1 - 2*sigma);
theta_hat = theta*(sigma./(1 - sigma) + theta./(1 - sigma)^2);
eta = 4*theta_hat./L;
tau = 2*(theta - theta_hat)./(2*theta + eta*L./2 + sqrt((2*theta + eta*L./2)^2 - 4*theta*(theta - theta_hat)));
lambda = sqrt(theta./(L*norm(loss_gradient(A, b, L, initial_point))));

for i = 1:iterations
    t_start = tic;
    gradient = loss_gradient(A, b, L, y);
    if gradient + v == 0
        HIPNEX = [HIPNEX, norm(gradient)^2./norm_1];
        continue
    end
    if 0.5*lambda*L*norm(lambda*(gradient + v) + y - x) <= theta_hat
        y_new = y;
        v_new = v;
    else
        y_new = y - (eye(2*n) + lambda*loss_hessian(A, L, y))\(lambda*gradient + y - x);
        v_new = y_new;
    end
    if lambda*norm(y_new - x) >= eta
        x_new = x - tau*lambda*(loss_gradient(A, b, L, y_new) + v_new);
        lambda_new = (1 - tau)*lambda;
    else
        x_new = x;
        lambda_new = lambda./(1 - tau);
    end
    x = x_new;
    y = y_new;
    v = v_new;
    lambda = lambda_new;
    t_end = toc(t_start);
    time = time + t_end;
    HIPNEX_x = [HIPNEX_x, time];
    HIPNEX_y = [HIPNEX_y, norm(loss_gradient(A, b, L, x))^2./norm_1];
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Generalized Optimistic Second Order Method

time = 0.0;
SO2_x = [0];
SO2_y = [1];

mu = 0;
sigma = 1;
alpha = 0.5;
beta = 0.5;

z = initial_point;
v = zeros(2*n, 1);
F = loss_gradient(A, b, L, z);

for i = 1:iterations
    t_start = tic;
    eta = sigma;
    J = loss_hessian(A, L, z);
    flag = false;
    counter = 0;

    while ~flag
        if counter >0
            eta = beta*eta;
        end
        direction = (eta*J + eye(2*n))\(-eta*F - v);
        z_new = z + direction;
        approx_first = F + J*(z_new - z);
        distance = norm(direction);
        F_new = loss_gradient(A, b, L, z_new);
        res_new = F_new - approx_first;
        flag = eta*norm(res_new) <= 1/2*alpha*distance;
        counter = counter + 1;
    end

    z = z_new;
    F = F_new;
    v = res_new*eta;
    sigma = eta/beta;
    norm_F = norm(F_new);
    t_end = toc(t_start);
    time = time + t_end;
    SO2_x = [SO2_x, time];
    SO2_y = [SO2_y, norm_F^2./norm_1];

end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Plot the Figures

figure
semilogy(result_1_x, result_1_y, 'r-*', 'MarkerIndices', 1:50:iterations, 'LineWidth', 3);
grid on
hold on
semilogy(result_2_x, result_2_y, 'k-*', 'MarkerIndices', 1:50:iterations, 'LineWidth', 3);
semilogy(HIPNEX_x, HIPNEX_y, 'b-*', 'MarkerIndices', 1:50:iterations, 'LineWidth', 3);
semilogy(SO2_x, SO2_y, 'g-*', 'MarkerIndices', 1:50:iterations, 'LineWidth', 3);
legend({'Adaptive SOM I', 'Adaptive SOM II', 'HIPNEX', 'Optimal SOM',},'Interpreter','latex','fontsize',20,'Location','northeast')
xlim([0 0.6])
ylim([1e-25 1e1])
xticks(0:0.06:0.6)
ax = gca;
ax.FontSize = 15;
xlabel('Time(s)','Interpreter','latex','fontsize',20)
ylabel('$\frac{\|F(z_T)\|^2}{\|F(z_0)\|^2}$','Interpreter','latex','fontsize',20)
set(gcf,'position',[0,0,600,400])
hold off