# NAG vs expricit weak discrete gradient scheme

import numpy as np
import sympy
import scipy
from scipy import optimize
import matplotlib.pyplot as plt

#parameters of the object function, and initial condition
class Param:
    def __init__(self,mode,A,init):
        self.mode = mode    #mode 0: quadratic fanction, 1:||Ax||^2 + 3sin^2(bx), 2:Cahn-Hilliard
        if (self.mode == 0) or (self.mode == 1):
            self.A = A[0]
            self.b = A[1]
            self.init = init[:]
            w,v = np.linalg.eig(self.A)
            self.mu = min(w)
            self.L = max(w)
            self.fopt = -0.5*self.b@np.linalg.inv(self.A)@self.b

        if self.mode == 2:  #Cahn-Hilliard
            L = A[0];   N = A[1]
            x = np.linspace(0,L,N) # grid
            self.dx = L/(N - 1)
            self.dxinv = 1/self.dx
            self.q = A[2]
            self.grid = x
            if init == 0:
                self.init = -np.ones_like(x) + 2*x/L + np.sin(2*np.pi*x/L) # initial condition for Cahn-Hiliard problem
            if init == 1:
                self.init = np.zeros_like(x)
                self.init[0] = -1
                self.init[-1] = 1

        if self.mode == 3:  #Hilbert matrix
            n = A
            self.A = np.zeros((n,n))
            for i in range(n):
                for j in range(n):
                    self.A[i][j] = 1/(i+j+1)
            self.init = np.ones(n)

        if self.mode == 4:  #lower bound
            self.L = A[0]
            self.N = A[1]
            self.init = np.ones(self.N)
            self.init[3] += 3

        if self.mode == 5:  #sqrt(1+x**2)
            self.init = init
            self.A = A

        if self.mode == 6:
            n = A[0]
            s = A[1]
            self.N = n
            np.random.seed(25566)
            self.a = np.random.randn(s,n)
            self.b = np.random.randn(s)
            self.init = np.random.randn(n)

        if self.mode == 7:
            self.init = init
            self.A = A[0]
            self.L = 3*max(self.A[0]*self.init[0]**2,self.A[1]*self.init[1]**2)
            self.fopt = 0


# objective function
def f(param,x):
    if param.mode == 0:
        return 0.5 * x @ param.A @ x.T + param.b @ x.T

    if param.mode == 1:
        return (param.A[0] @ x.T).T @ (param.A[0] @ x.T) + 3*(np.sin(param.A[1] @ x.T))**2

    if param.mode == 2:
        res = 0
        res += np.sum(0.25 * x**4 - 0.5 * x**2) * param.dx
        res += 0.5 * param.q * np.linalg.norm(x[:-1] - x[1:])**2 * param.dxinv 
        res += 0.5 * param.q * ( (x[1] - x[0])**2 + (x[-1] - x[-2])**2 ) * param.dxinv 
        return res
        # return (0.25*x**3 @ x.T - 0.5*x @ x.T)*param.dx + param.q*(x @ x.T - np.roll(x,1) @ x.T + x[0]*x[-1] + 0.5*((x[1] - x[0])**2 + (x[-1] - x[-2])**2))*param.dxinv

    if param.mode == 3:
        return 0.5 * x @ param.A @ x.T

    if param.mode == 4:
        res = 0
        res += 0.5 * (x[0]**2 + x[-1]**2)
        res += 0.5 * np.linalg.norm(x[:-1] - x[1:])**2
        res -= x[0]
        res = 0.25 * param.L * res
        return res
        # return 0.25*param.L * ( x @ x.T - np.roll(x,1) @ x.T + x[0]*x[-1] - x[0] )

    if param.mode == 5:
        return np.sqrt(1.0 + x@param.A@x.T)

    if param.mode == 6:
        return logsumexp(x @ param.a.T - param.b)

    if param.mode == 7:
        return 0.25* (param.A[0]*x[0]**4 + param.A[1]*x[1]**4)
    

# gradient
def f_grad(param,x):
    if param.mode == 0:
        return x @ param.A + param.b

    if param.mode == 1:
        return 2* param.A[0].T @ (param.A[0] @ x.T) + 3*np.sin(2 * param.A[1] @ x.T)

    if param.mode == 2:
        res = np.zeros_like(x)
        res[1:-1] += (x[1:-1]**3 - x[1:-1]) * param.dx
        res[1:-1] += param.q * 2 * x[1:-1] * param.dxinv
        res[1:-1] -= param.q * x[2:] * param.dxinv
        res[1:-1] -= param.q * x[:-2] * param.dxinv
        res[1] += param.q * (x[1] - x[0]) * param.dxinv
        res[-2] += param.q * (x[-2] - x[-1]) * param.dxinv
        return res
        # y = (x**3 - x)*param.dx + param.q*(2*x - np.roll(x,1) - np.roll(x,-1))*param.dxinv
        # y[1] += 0.5*param.q*(x[1] - x[0])*param.dxinv
        # y[-2] += -0.5*param.q*(x[-1] - x[-2]) *param.dxinv
        # y[0] = 0.
        # y[-1] = 0.
        # return y

    if param.mode == 3:
        return x @ param.A

    if param.mode == 4:
        res = np.zeros_like(x)
        res += 2 * x
        res[:-1] -= x[1:]
        res[1:] -= x[:-1]
        res[0] -= 1
        res = 0.25 * param.L * res
        return res
        # y = 2*x - np.roll(x,1) - np.roll(x,-1)
        # y[0] += x[-1]
        # y[-1] += x[0]
        # y[0] -= 1
        # y *= 0.25*param.L
        # return y

    if param.mode == 5:
        return (x / np.sqrt(1.0 + x@param.A@x.T))@A

    if param.mode == 6:
        z = x @ param.a.T - param.b
        w = z - max(z)
        n = np.exp(w)
        d = np.sum(n)
        return n @ param.a / d

    if param.mode == 7:
        return np.array([param.A[0]*x[0]**3, param.A[1]*x[1]**3])

# plot
def plt_res(param,k,res):
    if param.mode == 0: # quadratic fanction
        ax1.plot(k,res, marker='.', color = clr)
    if param.mode == 2: #Cahn-Hilliard
        ax1.plot(k,res+0.15596839595482387, marker='.', color = clr)
    if param.mode == 3: #Hilbert matrix
        ax1.plot(k,res-1.1446791085863936e-09, marker='.', color = clr)
    if param.mode == 4: #lower bound
        ax1.plot(k,res - 0.125 * param.L * (-1 + 1 / (param.N + 1)), marker='.', color = clr)
    if param.mode == 5: #sqrt(1+x**2)
        ax1.plot(k,res - 1, marker='.', color = clr)

k_skip = 1  # interval for plotting

def NAG_sc(param,h_0,maxitr):
    x = param.init  # initial condition
    y = x[:]
    k = 0   # iteration
    # k_skip = 1  # interval for plotting
    tol = 1e-12  # tolerance
    hist_k = [0]
    hist_f = [f(param,x)-param.fopt]
    hist_x0 = [x[0]];hist_x1=[x[1]]

    s = h_0 ** 2
    for i in range(maxitr):
        p = f_grad(param,x) # gradient
        # norm_grad = np.linalg.norm(p)
        res = f(param,x) - param.fopt
        if res < tol:
            break
        k += 1

        # NAG for strongly convex functions
        y_b = y
        y = x - s * p
        x = y + (1 - np.sqrt(param.mu * s)) / (1 + np.sqrt(param.mu * s)) * (y - y_b)

        # output
        if k % k_skip == 0:
            res = f(param,x) - param.fopt
            hist_k.append(k)
            hist_f.append(res)
            hist_x0.append(x[0])
            hist_x1.append(x[1])
            # plt_res(param, k, f_val)

    return k,hist_k,hist_f,hist_x0,hist_x1

def NAG_c(param,h_0,maxitr):
    x = param.init  # initial condition
    y = x[:]
    k = 0   # iteration
    # k_skip = 1  # interval for plotting
    tol = 1e-12  # tolerance
    hist_k = [0]
    hist_f = [f(param,x)-param.fopt]
    hist_x0 = [x[0]];hist_x1=[x[1]]

    s = h_0 ** 2
    for i in range(maxitr):
        p = f_grad(param,x) # gradient
        # norm_grad = np.linalg.norm(p)
        res = f(param,x) - param.fopt
        if res < tol:
            break
        k += 1

        # NAG for convex functions
        y_b = y
        y = x - s * p
        x = y + k / (k+3) * (y - y_b)

        # output
        if k % k_skip == 0:
            res = f(param,x) - param.fopt
            hist_k.append(k)
            hist_f.append(res)
            hist_x0.append(x[0])
            hist_x1.append(x[1])
            # plt_res(param, k, f_val)

    return k,hist_k,hist_f,hist_x0,hist_x1

def wDG_sc(param,h_0,maxitr):
    x = param.init  # initial condition
    v = x[:]
    k = 0   # iteration
    # k_skip = 1  # interval for plotting
    tol = 1e-12  # tolerance
    hist_k = [0]
    hist_f = [f(param,x)-param.fopt]
    hist_x0 = [x[0]];hist_x1=[x[1]]

    h = h_0
    for i in range(maxitr):
        # p = f_grad(param,x) # gradient
        # norm_grad = np.linalg.norm(p)
        res = f(param,x) - param.fopt
        if res < tol:
            break
        k += 1

        # wDG for strongly convex functions
        y = ((1+h)*x + h*v) / (1+2*h)
        v = (v + h*(y - f_grad(param,y)/param.mu)) / (1+h)
        x = (x + h*v) / (1+h)

        # output
        if k % k_skip == 0:
            res = f(param,x) - param.fopt
            hist_k.append(k)
            hist_f.append(res)
            hist_x0.append(x[0])
            hist_x1.append(x[1])
            # plt_res(param, k, f_val)

    return k,hist_k,hist_f,hist_x0,hist_x1

def wDG_sc_noy(param,h_0,maxitr):
    x = param.init  # initial condition
    v = x[:]
    k = 0   # iteration
    # k_skip = 1  # interval for plotting
    tol = 1e-12  # tolerance
    hist_k = [0]
    hist_f = [f(param,x)-param.fopt]
    hist_x0 = [x[0]];hist_x1=[x[1]]

    h = h_0
    for i in range(maxitr):
        # p = f_grad(param,x) # gradient
        # norm_grad = np.linalg.norm(p)
        res = f(param,x) - param.fopt
        if res < tol:
            break
        k += 1

        # wDG for strongly convex functions with z=x
        y = x
        v = (v + h*(y - f_grad(param,y)/param.mu)) / (1+h)
        x = (x + h*v) / (1+h)

        # output
        if k % k_skip == 0:
            res = f(param,x) - param.fopt
            hist_k.append(k)
            hist_f.append(res)
            hist_x0.append(x[0])
            hist_x1.append(x[1])
            # plt_res(param, k, f_val)

    return k,hist_k,hist_f,hist_x0,hist_x1

def wDG_c(param,h_0,maxitr):
    x = param.init  # initial condition
    v = x[:]
    k = 0   # iteration
    # k_skip = 1  # interval for plotting
    tol = 1e-12  # tolerance
    hist_k = [0]
    hist_f = [f(param,x)-param.fopt]
    hist_x0 = [x[0]];hist_x1=[x[1]]

    h = h_0
    for i in range(maxitr):
        # p = f_grad(param,x) # gradient
        # norm_grad = np.linalg.norm(p)
        res = f(param,x) - param.fopt
        if res < tol:
            break
        k += 1

        # wDG for convex functions
        z = x + (2*k+1)/(k+1)**2 *(v - x)
        v = v - (2*k+1)*h*h/4*f_grad(param,z)
        x = k*k/(k+1)**2*x + (2*k+1)/(k+1)**2 *v

        # output
        if k % k_skip == 0:
            res = f(param,x) - param.fopt
            hist_k.append(k)
            hist_f.append(res)
            hist_x0.append(x[0])
            hist_x1.append(x[1])
            # plt_res(param, k, f_val)

    return k,hist_k,hist_f,hist_x0,hist_x1

def pRK(param,h_0,maxitr):
    x = param.init  # initial condition
    v = x[:]
    k = 0   # iteration
    # k_skip = 1  # interval for plotting
    tol = 1e-12  # tolerance
    hist_k = [0]
    hist_f = [f(param,x)-param.fopt]
    hist_x0 = [x[0]];hist_x1=[x[1]]

    h = h_0
    for i in range(maxitr):
        # p = f_grad(param,x) # gradient
        # norm_grad = np.linalg.norm(p)
        res = f(param,x) - param.fopt
        if res < tol:
            break
        k += 1

        # pRK
        z = (x + 0.5*h*v) / (1+0.5*h)
        v = (v + h*(z - f_grad(param,z)/param.mu)) / (1+h)
        x = z + 0.5*h*(v - z)

        # output
        if k % k_skip == 0:
            res = f(param,x) - param.fopt
            hist_k.append(k)
            hist_f.append(res)
            hist_x0.append(x[0])
            hist_x1.append(x[1])
            # plt_res(param, k, f_val)

    return k,hist_k,hist_f,hist_x0,hist_x1

def pRK2(param,h_0,maxitr):
    x = param.init  # initial condition
    v = x[:]
    v_b = np.zeros_like(v)
    k = 0   # iteration
    # k_skip = 1  # interval for plotting
    tol = 1e-12  # tolerance
    hist_k = [0]
    hist_f = [f(param,x)-param.fopt]
    hist_x0 = [x[0]];hist_x1=[x[1]]

    h = h_0
    for i in range(maxitr):
        # p = f_grad(param,x) # gradient
        # norm_grad = np.linalg.norm(p)
        res = f(param,x) - param.fopt
        if res < tol:
            break
        k += 1

        # pRK
        v_b = v
        z = (x + (1-0.5*h)*h*v) / (1+(1-0.5*h)*h)
        v = (v + h*(z - 0.5*v - f_grad(param,z)/param.mu)) / (1+0.5*h)
        x = z + 0.5*h*h*(v - z) + (1-0.5*h)*h*(v-v_b)

        # output
        if k % k_skip == 0:
            res = f(param,x) - param.fopt
            hist_k.append(k)
            hist_f.append(res)
            hist_x0.append(x[0])
            hist_x1.append(x[1])
            # plt_res(param, k, f_val)

    return k,hist_k,hist_f,hist_x0,hist_x1

# Itoh--Abe equation for mode0
def equ_IA_m0(y_i,param,h,i,x):
    # yx_n = np.concatenate([x[:i],y_i,x[i+1:]])
    # print(y_i)
    if i==0:
        return (y_i-x[i])/h + 0.5*param.A[i][i]*(y_i+x[i]) + param.A[0][1]*x[1] + param.b[i]
    if i==1:
        return (y_i-x[i])/h + 0.5*param.A[i][i]*(y_i+x[i]) + param.A[1][0]*x[0] + param.b[i]

# Itoh--Abe equation
def equ_IA(y_i,param,h,i,x):
    yx_n = np.concatenate([x[:i],y_i,x[i+1:]])
    return 1/h + (f(param,yx_n) - f(param,x))/(y_i- x[i])**2

# Itoh--Abe for gradient flow
def wDG_IA(param,h_0,maxitr):
    x = param.init[:]  # initial condition
    v = x
    k = 0   # iteration
    # k_skip = 1  # interval for plotting
    tol = 1e-12  # tolerance
    hist_k = [0]
    hist_f = [f(param,x)-param.fopt]
    hist_x0 = [x[0]];hist_x1=[x[1]]

    h = h_0
    for i in range(maxitr):
        # p = f_grad(param,x) # gradient
        # norm_grad = np.linalg.norm(p)
        res = f(param,x) - param.fopt
        if res < tol:
            break
        k += 1

        # wDG (Itoh--Abe)
        for j in range(len(x)):
            temp = optimize.root_scalar(equ_IA_m0,bracket=[-20,20],args = (param,h,j,x))
            # print(temp.root)
            x[j] = temp.root
            # x[j] = ((1-0.5*h*param.A[j][j])*x[j] - h*param.b[j])/(1+0.5*h*param.A[j][j])
        # output
        if k % k_skip == 0:
            res = f(param,x) - param.fopt
            hist_k.append(k)
            hist_f.append(res)
            hist_x0.append(x[0])
            hist_x1.append(x[1])
            # plt_res(param, k, f_val)

    return k,hist_k,hist_f,hist_x0,hist_x1

# Itoh--Abe equation for mode0
def equ_IAacc_m0(y_i,param,h,i,x,v,z,beta1,gamma1,bg):
    # yx_n = np.concatenate([x[:i],y_i,x[i+1:]])
    # print(y_i)
    if i == 0:
        return ((y_i-x[i])/h + y_i -v[i])/h - (beta1*z[i]+gamma1*y_i) + (y_i-x[i])/h + y_i + 0.5/bg*(0.5*param.A[i][i]*(y_i+z[i]) + param.A[0][1]*x[1] + param.b[i])
    if i == 1:
        return ((y_i-x[i])/h + y_i -v[i])/h - (beta1*z[i]+gamma1*y_i) + (y_i-x[i])/h + y_i + 0.5/bg*(0.5*param.A[i][i]*(y_i+z[i]) + param.A[0][1]*x[0] + param.b[i])

# Itoh--Abe for accelerated gradient flow
def wDG_IAacc(param,h_0,beta1,gamma1,bg,maxitr):
    x = param.init  # initial condition
    v = x[:]
    z = np.zeros_like(x)
    k = 0   # iteration
    k_skip = 1  # interval for plotting
    tol = 1e-12  # tolerance
    hist_k = [0]
    hist_f = [f(param,x)-param.fopt]
    hist_x0 = [x[0]];hist_x1=[x[1]]

    h = h_0
    for i in range(maxitr):
        # p = f_grad(param,x) # gradient
        # norm_grad = np.linalg.norm(p)
        res = f(param,x) - param.fopt
        if res < tol:
            break
        k += 1

        # wDG (Itoh--Abe)
        x_b = x[:]
        z = ((1+h)*x + h*v) / (1+2*h)
        for j in range(len(x)):
            temp = optimize.root_scalar(equ_IAacc_m0,bracket=[-20,20],args = (param,h,j,x,v,z,beta1,gamma1,bg))
            x[j] = temp.root
        v = (x - x_b)/h + x
        # print(x)

        # output
        if k % k_skip == 0:
            res = f(param,x) - param.fopt
            hist_k.append(k)
            hist_f.append(res)
            hist_x0.append(x[0])
            hist_x1.append(x[1])
            # plt_res(param, k, f_val)

    return k,hist_k,hist_f,hist_x0,hist_x1

# convex functions

# mode = 0; A = [np.array([[0.101,0.1-0.001],[0.1-0.001,0.101]]), np.array([0.01,0.02])]; init = np.array([2.,3.])    # matrix for defining quadratic function
# mode = 1; A = [np.diag([1,0.5]), np.array([1,0])]; init = np,array([10,15])   # [A,b] for ||Ax||^2 + 3sin^2(bx)
# mode = 2; A = [1, 1001, 0.01]; init = 0   #Cahn-Hilliard A = [length, the number of grids, diffusion coefficient q] init 0: 0.1*sin(2*pi*x/L) - 1 + 2*x/L
# mode = 3; A = 1000 ;init = 0  #Hilbert matrix A = dimension
# mode = 4; A = [10, 1000]; init = 0   #Nesterov's lower bound A = [L,dimension]
# mode = 5; A = np.diag([0.05,1,3]); init = np.array([3,5,3])  #sqrt(1+x**2)
# mode = 6; A = [50,100]; init = 0  #logsumexp A = [dimension, the number of terms]
mode = 7; A = [np.array([0.1, 0.001])]; init = np.array([2.,4.])

cm = plt.get_cmap("Spectral")
clr = cm(0.1) # color for plotting

fig=plt.figure(figsize=(12.8,4.8))
ax1=fig.add_subplot(1,2,1,xlabel=r'$x$', ylabel=r'$y$')
ax2=fig.add_subplot(1,2,2,xlabel=r'$iteration$', ylabel=r'$f(x)$')
# ax1.set_xscale('log')
# ax1.set_yscale('log')
# ax2.set_xscale('log')
ax2.set_yscale('log')

r = 1
rstr=str(r)+'f100'

param = Param(mode,A,init)  # initialization
h = r/np.sqrt(param.L)
# h = 2
k,hist_k,hist_f,hist_x0,hist_x1 = NAG_c(param,h_0 = h,maxitr = 10000)
ax1.plot(hist_x0,hist_x1,label = "NAG_c")
ax1.legend()
ax2.plot(hist_k,hist_f, marker='.',linestyle = "None", label = "NAG_sc")
ax2.legend()
datafile = open('NAGctraj'+rstr+'.dat', 'w') # output
# for i in range(len(hist_k)):
#     datafile.write(str(hist_k[i])+' '+str(hist_f[i])+' '+str(hist_x0[i])+' '+str(hist_x1[i])+'\n')
# datafile.close()
for i in range(len(hist_k)):
    if i < 80:
        datafile.write(str(hist_k[i])+' '+str(hist_f[i])+' '+str(hist_x0[i])+' '+str(hist_x1[i])+'\n')
    elif i<2000 and i%50 ==0:
        datafile.write(str(hist_k[i])+' '+str(hist_f[i])+' '+str(hist_x0[i])+' '+str(hist_x1[i])+'\n')
    elif i%200 == 0:
        datafile.write(str(hist_k[i])+' '+str(hist_f[i])+' '+str(hist_x0[i])+' '+str(hist_x1[i])+'\n')
datafile.close()

param = Param(mode,A,init)  # initialization
h = r/np.sqrt(param.L)
k,hist_k,hist_f,hist_x0,hist_x1 = wDG_c(param,h_0 = h,maxitr = 10000)
ax1.plot(hist_x0,hist_x1,label = "wDG_c")
ax1.legend()
ax2.plot(hist_k,hist_f, marker='.',linestyle = "None", label = "wDG_c")
ax2.legend()
datafile = open('wDGctraj'+rstr+'.dat', 'w') # output
# for i in range(len(hist_k)):
#     datafile.write(str(hist_k[i])+' '+str(hist_f[i])+' '+str(hist_x0[i])+' '+str(hist_x1[i])+'\n')
# datafile.close()
for i in range(len(hist_k)):
    if i < 80:
        datafile.write(str(hist_k[i])+' '+str(hist_f[i])+' '+str(hist_x0[i])+' '+str(hist_x1[i])+'\n')
    elif i<2000 and i%50 ==0:
        datafile.write(str(hist_k[i])+' '+str(hist_f[i])+' '+str(hist_x0[i])+' '+str(hist_x1[i])+'\n')
    elif i%200 == 0:
        datafile.write(str(hist_k[i])+' '+str(hist_f[i])+' '+str(hist_x0[i])+' '+str(hist_x1[i])+'\n')
datafile.close()

# param = Param(mode,A,init)  # initialization
# # h = param.mu/(param.L - param.mu)
# k,hist_k,hist_f,hist_x0,hist_x1 = wDG_sc_noy(param,h_0 = h,maxitr = 10000)
# ax1.plot(hist_x0,hist_x1,label = "wDG_sc_noy")
# ax1.legend()
# ax2.plot(hist_k,hist_f, marker='.',linestyle = "None", label = "wDG_sc_noy")
# ax2.legend()
# datafile = open('wDGscnoytraj'+rstr+'.dat', 'w') # output
# for i in range(len(hist_k)):
#     datafile.write(str(hist_k[i])+' '+str(hist_f[i])+' '+str(hist_x0[i])+' '+str(hist_x1[i])+'\n')
# datafile.close()

# param = Param(mode,A,init)  # initialization
# # h = param.mu/(param.L - param.mu)
# h = 1.4*np.sqrt(param.mu)*h
# k,hist_k,hist_f,hist_x0,hist_x1 = pRK(param,h_0 = h,maxitr = 10000)
# ax1.plot(hist_x0,hist_x1,label = "pRK")
# ax1.legend()
# ax2.plot(hist_k,hist_f, marker='.',linestyle = "None", label = "pRK")
# ax2.legend()
# datafile = open('pRKtraj'+rstr+'.dat', 'w') # output
# for i in range(len(hist_k)):
#     datafile.write(str(hist_k[i])+' '+str(hist_f[i])+' '+str(hist_x0[i])+' '+str(hist_x1[i])+'\n')
# datafile.close()

# param = Param(mode,A,init)  # initialization
# # h = param.mu/(param.L - param.mu)
# k,hist_k,hist_f,hist_x0,hist_x1 = pRK2(param,h_0 = h,maxitr = 10000)
# ax1.plot(hist_x0,hist_x1,label = "pRK2")
# ax1.legend()
# ax2.plot(hist_k,hist_f, marker='.',linestyle = "None", label = "pRK2")
# ax2.legend()
# datafile = open('pRK2traj'+rstr+'.dat', 'w') #output
# for i in range(len(hist_k)):
#     datafile.write(str(hist_k[i])+' '+str(hist_f[i])+' '+str(hist_x0[i])+' '+str(hist_x1[i])+'\n')
# datafile.close()

# param2 = Param(mode,A,init)  # initialization
# h1 = 100/param2.L/np.sqrt(2)
# k,hist_k,hist_f,hist_x0,hist_x1 = wDG_IA(param2,h_0 = h1,maxitr = 1000)
# ax1.plot(hist_x0,hist_x1,label = "wDG_IA")
# ax1.legend()
# ax2.plot(hist_k,hist_f, marker='.',linestyle = "None", label = "wDG_IA")
# ax2.legend()

# param = Param(mode,A,np.array([2.,3.]))  # initialization
# alpha = np.sqrt(2)*param.L**2/param.mu - param.mu/4
# beta = param.mu/2
# gamma = -param.mu/4
# alpha = np.sqrt(2)*param.L**2/param.mu
# beta = param.mu/4
# gamma = 0
# bg = beta+gamma
# beta1 = beta/(beta+gamma)
# gamma1 = gamma/(beta+gamma)
# h = 1000*np.sqrt(bg)/(np.sqrt(alpha+gamma) - np.sqrt(bg))
# # h=0.1*h*np.sqrt(param.mu)
# k,hist_k,hist_f,hist_x0,hist_x1 = wDG_IAacc(param,h_0 = h,beta1=beta1,gamma1=gamma1,bg=bg,maxitr = 10000)
# # print(k)
# ax1.plot(hist_x0,hist_x1,label = "wDG_accIA")
# ax1.legend()
# ax2.plot(hist_k,hist_f, marker='.',linestyle = "None", label = "wDG_accIA")
# ax2.legend()

# plt.savefig('conv.pdf',bbox_inches='tight')
plt.show()

# strongly convex functions

mode = 0; A = [np.array([[0.101,0.1-0.001],[0.1-0.001,0.101]]), np.array([0.01,0.02])]; init = np.array([2.,3.])    # matrix for defining quadratic function
# mode = 1; A = [np.diag([1,0.5]), np.array([1,0])]; init = np,array([10,15])   # [A,b] for ||Ax||^2 + 3sin^2(bx)
# mode = 2; A = [1, 1001, 0.01]; init = 0   # Cahn-Hilliard A = [length, the number of grids, diffusion coefficient q] init 0: 0.1*sin(2*pi*x/L) - 1 + 2*x/L
# mode = 3; A = 1000 ;init = 0  #Hilbert matrix A = dimension
# mode = 4; A = [10, 1000]; init = 0   #Nesterov's lower bound A = [L, dimension]
# mode = 5; A = np.diag([0.05,1,3]); init = np.array([3,5,3])  #sqrt(1+x**2)
# mode = 6; A = [50,100]; init = 0  #logsumexp A = [dimension, the number of terms]
# mode = 7; A = [np.array([0.001, 0.1])]; init = np.array([2.,3.])

cm = plt.get_cmap("Spectral")
clr = cm(0.1) # color for plotting

fig=plt.figure(figsize=(12.8,4.8))
ax1=fig.add_subplot(1,2,1,xlabel=r'$x$', ylabel=r'$y$')
ax2=fig.add_subplot(1,2,2,xlabel=r'$iteration$', ylabel=r'$f(x)$')
# ax1.set_xscale('log')
# ax1.set_yscale('log')
# ax2.set_xscale('log')
ax2.set_yscale('log')

r = 1
rstr='pRK2opt'
rstr=str(r)

param = Param(mode,A,init)  # initialization
h = r/np.sqrt(param.L)
# h = 2
k,hist_k,hist_f,hist_x0,hist_x1 = NAG_sc(param,h_0 = h,maxitr = 10000)
ax1.plot(hist_x0,hist_x1,label = "NAG_sc")
ax1.legend()
ax2.plot(hist_k,hist_f, marker='.',linestyle = "None", label = "NAG_sc")
ax2.legend()
datafile = open('NAGsctraj'+rstr+'.dat', 'w') # output
for i in range(len(hist_k)):
    datafile.write(str(hist_k[i])+' '+str(hist_f[i])+' '+str(hist_x0[i])+' '+str(hist_x1[i])+'\n')
datafile.close()

# param = Param(mode,A,init)  # initialization
# h = h*np.sqrt(param.mu)
h = np.sqrt(param.mu)/(np.sqrt(param.L) - np.sqrt(param.mu))
k,hist_k,hist_f,hist_x0,hist_x1 = wDG_sc(param,h_0 = h,maxitr = 10000)
ax1.plot(hist_x0,hist_x1,label = "wDG_sc")
ax1.legend()
ax2.plot(hist_k,hist_f, marker='.',linestyle = "None", label = "wDG_sc")
ax2.legend()
datafile = open('wDGsctraj'+rstr+'.dat', 'w') # output
for i in range(len(hist_k)):
    datafile.write(str(hist_k[i])+' '+str(hist_f[i])+' '+str(hist_x0[i])+' '+str(hist_x1[i])+'\n')
datafile.close()

param = Param(mode,A,init)  # initialization
# h = param.mu/(param.L - param.mu)
k,hist_k,hist_f,hist_x0,hist_x1 = wDG_sc_noy(param,h_0 = h,maxitr = 10000)
ax1.plot(hist_x0,hist_x1,label = "wDG_sc_noy")
ax1.legend()
ax2.plot(hist_k,hist_f, marker='.',linestyle = "None", label = "wDG_sc_noy")
ax2.legend()
datafile = open('wDGscnoytraj'+rstr+'.dat', 'w') # output
for i in range(len(hist_k)):
    datafile.write(str(hist_k[i])+' '+str(hist_f[i])+' '+str(hist_x0[i])+' '+str(hist_x1[i])+'\n')
datafile.close()

# param = Param(mode,A,init)  # initialization
# # h = 4*param.mu/param.L
# # h = 1.4*np.sqrt(param.mu)*h
# k,hist_k,hist_f,hist_x0,hist_x1 = pRK(param,h_0 = h,maxitr = 10000)
# ax1.plot(hist_x0,hist_x1,label = "pRK")
# ax1.legend()
# ax2.plot(hist_k,hist_f, marker='.',linestyle = "None", label = "pRK")
# ax2.legend()
# datafile = open('pRKtraj'+rstr+'.dat', 'w') # output
# for i in range(len(hist_k)):
#     datafile.write(str(hist_k[i])+' '+str(hist_f[i])+' '+str(hist_x0[i])+' '+str(hist_x1[i])+'\n')
# datafile.close()

# param = Param(mode,A,init)  # initialization
# # h = param.mu/(param.L - param.mu)
# k,hist_k,hist_f,hist_x0,hist_x1 = pRK2(param,h_0 = h,maxitr = 10000)
# ax1.plot(hist_x0,hist_x1,label = "pRK2")
# ax1.legend()
# ax2.plot(hist_k,hist_f, marker='.',linestyle = "None", label = "pRK2")
# ax2.legend()
# datafile = open('pRK2traj'+rstr+'.dat', 'w') # output
# for i in range(len(hist_k)):
#     datafile.write(str(hist_k[i])+' '+str(hist_f[i])+' '+str(hist_x0[i])+' '+str(hist_x1[i])+'\n')
# datafile.close()

# param2 = Param(mode,A,init)  # initialization
# h1 = 100/param2.L/np.sqrt(2)
# k,hist_k,hist_f,hist_x0,hist_x1 = wDG_IA(param2,h_0 = h1,maxitr = 1000)
# ax1.plot(hist_x0,hist_x1,label = "wDG_IA")
# ax1.legend()
# ax2.plot(hist_k,hist_f, marker='.',linestyle = "None", label = "wDG_IA")
# ax2.legend()

# param = Param(mode,A,np.array([2.,3.]))  # initialization
# alpha = np.sqrt(2)*param.L**2/param.mu - param.mu/4
# beta = param.mu/2
# gamma = -param.mu/4
# alpha = np.sqrt(2)*param.L**2/param.mu
# beta = param.mu/4
# gamma = 0
# bg = beta+gamma
# beta1 = beta/(beta+gamma)
# gamma1 = gamma/(beta+gamma)
# h = 1000*np.sqrt(bg)/(np.sqrt(alpha+gamma) - np.sqrt(bg))
# # h=0.1*h*np.sqrt(param.mu)
# k,hist_k,hist_f,hist_x0,hist_x1 = wDG_IAacc(param,h_0 = h,beta1=beta1,gamma1=gamma1,bg=bg,maxitr = 10000)
# # print(k)
# ax1.plot(hist_x0,hist_x1,label = "wDG_accIA")
# ax1.legend()
# ax2.plot(hist_k,hist_f, marker='.',linestyle = "None", label = "wDG_accIA")
# ax2.legend()

# plt.savefig('conv.pdf',bbox_inches='tight')
plt.show()