import numpy as np
import random
import time
import matplotlib.pyplot as plt
from scipy.optimize import minimize

# Configuration
dimension = 500
np.random.seed(42)

# Objective function f_n(x)
def f(w):
    term1 = 0.5 * w[0]**2  # (x^{(1)})^2
    term2 = 0.5 * sum((w[i+1] - w[i])**2 for i in range(dimension - 1))  # Sum of squared differences
    term3 = 0.5 * w[-1]**2  # (x^{(n)})^2
    term4 = -w[0]  # -x^{(1)}
    return term1 + term2 + term3 + term4

# The gradient of f_n(x)
def grad_f(w):
    grad = np.zeros_like(w)
    grad[0] = w[0] - (w[1] - w[0]) - 1  # Gradient for x^{(1)}
    for i in range(1, dimension - 1):
        grad[i] = (w[i] - w[i-1]) - (w[i+1] - w[i])  # Gradient for 2 <= i <= n-1
    grad[-1] = w[-1] - (w[-1] - w[-2])  # Gradient for x^{(n)}
    return grad

# Helper functions
def random_vector_in_unit_ball(n):
    vec = np.random.uniform(-1, 1, n)
    norm = np.linalg.norm(vec)
    return vec / norm if norm != 0 else vec

def alpha_t(t):
    return 0.4 / t**0.51

# Scaled running minimum of gradient norms
def scaled_min_grad_norms(grad_norms, scaling_power=0.49):
    running_min = []
    current_min = float('inf')
    for t, grad_norm in enumerate(grad_norms, start=1):
        current_min = min(current_min, grad_norm)
        running_min.append((t**scaling_power) * current_min)
    return running_min

# Algorithms with timing
def RGF(initial_theta, iterations, alpha_k=1 /4, mu_k=10**-4):
    x_k = initial_theta
    grad_norms = []
    times = []
    start_time = time.time()
    for _ in range(2, iterations):
        s_k = random_vector_in_unit_ball(dimension)
        numerator = f(x_k + mu_k * s_k) - f(x_k)
        x_k = x_k - alpha_k * (numerator / mu_k) * s_k
        grad = grad_f(x_k)
        grad_norms.append(np.linalg.norm(grad))
        times.append(time.time() - start_time)
    return grad_norms, times

def GLD(initial_theta, iterations, R=0.0001, r=0.00001):
    theta = initial_theta
    K = int(np.log2(R / r))
    grad_norms = []
    times = []
    start_time = time.time()
    for _ in range(1, iterations + 1):
        sampled_points = [theta]
        for k in range(K):
            r_k = 2**(-k) * R
            v_k = r_k * np.random.normal(0, 1, len(theta))
            sampled_points.append(theta + v_k)
        f_values_sampled = [f(point) for point in sampled_points]
        min_index = np.argmin(f_values_sampled)
        theta = sampled_points[min_index]
        grad = grad_f(theta)
        grad_norms.append(np.linalg.norm(grad))
        times.append(time.time() - start_time)
    return grad_norms, times

def STP(initial_theta, iterations):
    theta = initial_theta
    current_f = f(theta)
    grad_norms = []
    times = []
    start_time = time.time()
    for t in range(1, iterations + 1):
        st = random_vector_in_unit_ball(len(initial_theta))
        theta_plus = theta + alpha_t(t) * st
        theta_minus = theta - alpha_t(t) * st
        f_theta_plus = f(theta_plus)
        f_theta_minus = f(theta_minus)
        if current_f <= min(f_theta_plus, f_theta_minus):
            pass
        elif f_theta_plus <= f_theta_minus:
            theta = theta_plus
            current_f = f_theta_plus
        else:
            theta = theta_minus
            current_f = f_theta_minus
        grad = grad_f(theta)
        grad_norms.append(np.linalg.norm(grad))
        times.append(time.time() - start_time)
    return grad_norms, times

# Main configuration
iterations = 50000
n_repeats =50

stp_grad_norms = []
GLD_grad_norms = []
rgf_grad_norms = []

stp_times = []
GLD_times = []
rgf_times = []

stp_scaled_min_grad_norms = []
GLD_scaled_min_grad_norms = []
rgf_scaled_min_grad_norms = []

# Run the algorithms
c = 0
for _ in range(n_repeats):
    print(c)
    c += 1
    initial_theta = np.zeros(dimension)
    stp_norms, stp_time = STP(initial_theta, iterations)
    GLD_norms, GLD_time = GLD(initial_theta, iterations)
    rgf_norms, rgf_time = RGF(initial_theta, iterations)

    stp_grad_norms.append(stp_norms)
    GLD_grad_norms.append(GLD_norms)
    rgf_grad_norms.append(rgf_norms)

    stp_times.append(stp_time)
    GLD_times.append(GLD_time)
    rgf_times.append(rgf_time)

    stp_scaled_min_grad_norms.append(scaled_min_grad_norms(stp_norms))
    GLD_scaled_min_grad_norms.append(scaled_min_grad_norms(GLD_norms))
    rgf_scaled_min_grad_norms.append(scaled_min_grad_norms(rgf_norms))

import random
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

# Choose a consistent color palette
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']

# Define delta_x and other necessary variables
delta_x = 0.05  # Adjust this delta to move x-labels spacing

# First set of plots (log of gradient norms)
plt.figure(figsize=(20, 12))  # Increase the figure width to spread out the subplots

# Find the min and max across all algorithms for consistent Y-axis scaling
all_grad_norms = np.concatenate(stp_grad_norms + GLD_grad_norms + rgf_grad_norms)
log_grad_min = np.min(np.log10(all_grad_norms))
log_grad_max = np.max(np.log10(all_grad_norms))

for i in range(n_repeats):
    # Raw gradient norms
    plt.subplot(2, 3, 1)
    plt.plot(np.log10(stp_grad_norms[i][:iterations]), color=colors[0], alpha=0.7, linewidth=2)
    plt.xlabel(r'Iteration $T$', fontsize=22)
    plt.ylabel(r'$log(\|\nabla f(\theta^T)\|)$', fontsize=22)
    plt.title('STP Algorithm', fontsize=24, fontweight='bold')
    plt.ylim(log_grad_min, log_grad_max)
    plt.tick_params(axis='both', labelsize=18)
    plt.grid(True, which='both', linestyle='--', linewidth=0.5, alpha=0.5)

    plt.subplot(2, 3, 2)
    plt.plot(np.log10(GLD_grad_norms[i][:iterations]), color=colors[1], alpha=0.7, linewidth=2)
    plt.xlabel(r'Iteration $T$', fontsize=22)
    plt.ylabel(r'$log(\|\nabla f(\theta^T)\|)$', fontsize=22)
    plt.title('GLD Algorithm', fontsize=24, fontweight='bold')
    plt.ylim(log_grad_min, log_grad_max)
    plt.tick_params(axis='both', labelsize=18)
    plt.grid(True, which='both', linestyle='--', linewidth=0.5, alpha=0.5)

    plt.subplot(2, 3, 3)
    plt.plot(np.log10(rgf_grad_norms[i][:iterations]), color=colors[2], alpha=0.7, linewidth=2)
    plt.xlabel(r'Iteration $T$', fontsize=22)
    plt.ylabel(r'$log(\|\nabla f(\theta^T)\|)$', fontsize=22)
    plt.title('RGF Algorithm', fontsize=24, fontweight='bold')
    plt.ylim(log_grad_min, log_grad_max)
    plt.tick_params(axis='both', labelsize=18)
    plt.grid(True, which='both', linestyle='--', linewidth=0.5, alpha=0.5)

# Scaled running minimum norms
for i in range(n_repeats):
    scaled_min_grad_all = np.concatenate(stp_scaled_min_grad_norms + GLD_scaled_min_grad_norms + rgf_scaled_min_grad_norms)
    scaled_min_grad_min = np.min(scaled_min_grad_all)
    scaled_min_grad_max = np.max(scaled_min_grad_all)

    plt.subplot(2, 3, 4)
    plt.plot(stp_scaled_min_grad_norms[i][:iterations], color=colors[0], alpha=0.7, linewidth=2)
    plt.xlabel(r'Iteration $T$', fontsize=22)
    plt.ylabel(r'$T^{0.49} \cdot \min_{t \leq T} \|\nabla f(\theta^t)\|$', fontsize=22)
    plt.title('STP Algorithm', fontsize=24, fontweight='bold')
    plt.ylim(scaled_min_grad_min, scaled_min_grad_max)
    plt.tick_params(axis='both', labelsize=18)
    plt.grid(True, which='both', linestyle='--', linewidth=0.5, alpha=0.5)

    plt.subplot(2, 3, 5)
    plt.plot(GLD_scaled_min_grad_norms[i][:iterations], color=colors[1], alpha=0.7, linewidth=2)
    plt.xlabel(r'Iteration $T$', fontsize=22)
    plt.ylabel(r'$T^{0.49} \cdot \min_{t \leq T} \|\nabla f(\theta^t)\|$', fontsize=22)
    plt.title('GLD Algorithm', fontsize=24, fontweight='bold')
    plt.ylim(scaled_min_grad_min, scaled_min_grad_max)
    plt.tick_params(axis='both', labelsize=18)
    plt.grid(True, which='both', linestyle='--', linewidth=0.5, alpha=0.5)

    plt.subplot(2, 3, 6)
    plt.plot(rgf_scaled_min_grad_norms[i][:iterations], color=colors[2], alpha=0.7, linewidth=2)
    plt.xlabel(r'Iteration $T$', fontsize=22)
    plt.ylabel(r'$T^{0.49} \cdot \min_{t \leq T} \|\nabla f(\theta^t)\|$', fontsize=22)
    plt.title('RGF Algorithm', fontsize=24, fontweight='bold')
    plt.ylim(scaled_min_grad_min, scaled_min_grad_max)
    plt.tick_params(axis='both', labelsize=18)
    plt.grid(True, which='both', linestyle='--', linewidth=0.5, alpha=0.5)

# Adjust spacing between subplots
plt.subplots_adjust(wspace=0.4, hspace=0.3)  # Increase horizontal space to avoid overlap
plt.tight_layout()
plt.show()

# Second set of plots (log of gradient norms vs time)
plt.figure(figsize=(20, 6))  # Increase the figure width to spread out the subplots

# Find max time across all algorithms to ensure the same x-axis range
max_time = max(np.max(stp_times[-1]), np.max(GLD_times[-1]), np.max(rgf_times[-1]))

for i in range(n_repeats):
    plt.subplot(1, 3, 1)
    plt.plot(stp_times[i][:iterations], np.log10(stp_grad_norms[i][:iterations]), color=colors[0], alpha=0.7, linewidth=2)
    plt.xlabel(r'Time t (seconds)', fontsize=22)
    plt.ylabel(r'$log(\|\nabla f(\theta^t)\|)$', fontsize=22)
    plt.title('STP Algorithm', fontsize=24, fontweight='bold')
    plt.ylim(log_grad_min, log_grad_max)
    plt.xlim(0, max_time)
    plt.tick_params(axis='both', labelsize=18)
    plt.grid(True, which='both', linestyle='--', linewidth=0.5, alpha=0.5)

    plt.subplot(1, 3, 2)
    plt.plot(GLD_times[i][:iterations], np.log10(GLD_grad_norms[i][:iterations]), color=colors[1], alpha=0.7, linewidth=2)
    plt.xlabel(r'Time t (seconds)', fontsize=22)
    plt.ylabel(r'$log(\|\nabla f(\theta^t)\|)$', fontsize=22)
    plt.title('GLD Algorithm', fontsize=24, fontweight='bold')
    plt.ylim(log_grad_min, log_grad_max)
    plt.xlim(0, max_time)
    plt.tick_params(axis='both', labelsize=18)
    plt.grid(True, which='both', linestyle='--', linewidth=0.5, alpha=0.5)

    plt.subplot(1, 3, 3)
    plt.plot(rgf_times[i][:iterations], np.log10(rgf_grad_norms[i][:iterations]), color=colors[2], alpha=0.7, linewidth=2)
    plt.xlabel(r'Time t (seconds)', fontsize=22)
    plt.ylabel(r'$log(\|\nabla f(\theta^t)\|)$', fontsize=22)
    plt.title('RGF Algorithm', fontsize=24, fontweight='bold')
    plt.ylim(log_grad_min, log_grad_max)
    plt.xlim(0, max_time)
    plt.tick_params(axis='both', labelsize=18)
    plt.grid(True, which='both', linestyle='--', linewidth=0.5, alpha=0.5)

# Adjust spacing between subplots
plt.subplots_adjust(wspace=0.4, hspace=0.3)  # Increase horizontal space to avoid overlap
plt.tight_layout()
plt.show()