using Convex
using SCS
using Random
#using Flux
using Zygote
using BenchmarkTools
using JuMP
using MathOptInterface
using Clarabel

optimizer = Convex.MOI.OptimizerWithAttributes(Clarabel.Optimizer, "verbose" => 0)
# Set random seed for reproducibility
Random.seed!(0)

# Constants
epsilon = 1e-4
beta = 0.1
eta_s = 0.1
eta_l = 1.1
slope_epsilon = 1e-1
MAX_INNER_EPOCHS = 10^4
MAX_OUTER_EPOCHS = 10^5

# Objective function
function f(x)
    return x[1]+(1/2)*x[2]+(1/2)*x[3]+(1/3)*x[4]+(1/4)*x[5]+(1/3)*x[6]
end 

# Constraint functions for Convex.jl
function g_u_cvx(x_var, t, j)
    return exp(t[j,1]^2+t[j,2]^2)-(x_var[1]+x_var[2]*t[j,1]+x_var[3]*t[j,2]+x_var[4]*t[j,1]^2+x_var[5]*t[j,1]*t[j,2]+x_var[6]*t[j,2]^2) 
 
end

# Inner minimization using Convex.jl
function get_x_watson(u)
    x_var = Variable(6)
    objective = x_var[1]+(1/2)*x_var[2]+(1/2)*x_var[3]+(1/3)*x_var[4]+(1/4)*x_var[5]+(1/3)*x_var[6]
    constraints = [g_u_cvx(x_var, u, j) <= -1e-5 for j in 1:size(u,1)]
    problem = minimize(objective, constraints)
    solve!(problem, optimizer)

    return vec(evaluate(x_var))
end
Zygote.@nograd get_x_watson

# Constraint functions for barrier method
function g_u(x, t, j)
    return exp(t[j,1]^2+t[j,2]^2)-(x[1]+x[2]*t[j,1]+x[3]*t[j,2]+x[4]*t[j,1]^2+x[5]*t[j,1]*t[j,2]+x[6]*t[j,2]^2) 
end

function safe_log(x)
    return log(max(x, 1e-10))
end

function barrier(x, t, alpha)
    constraint_values = [-g_u(x, t, j) for j in 1:size(t,1)]
    return alpha * f(x)- sum(safe_log.(constraint_values))
end

# Inner minimization function
function inner_min(u)
    x = get_x_watson(copy(u))
    alpha = 1e5

    f_val_xmin = f(x)
    println("f(y_min) = ", f_val_xmin)
    println("x = ", x)
    return x
end

# Main optimization loop
function run()
    u = rand(20,2)  # Random initial u
    alpha = 1e5
    u_prev = copy(u)
    clip_valueL = 0.0 
    clip_valueU = 1.0 
    x_final = nothing
    for epoch in 1:MAX_OUTER_EPOCHS
        lr_u = 1e-1  # Update outer learning rate here

        x = inner_min(u)
        loss_value_u(u) = barrier(x, u, alpha)
        grad_u = gradient(loss_value_u, u)[1]

        u .= u .+ lr_u .* grad_u / alpha
        u .= clamp.(u, clip_valueL, clip_valueU)

        if norm(u .- u_prev) < 1e-2
            println("Final u: ", u, "Grad u: ", grad_u)
            break
        else
            u_prev .= u
        end
    end
end

# Execute the optimization
t0 = @benchmark run()
println(t0)
