
# function SI_NNLS_restart(C::SparseMatrixCSC, x0::Vector{Float64}, C_x0::Vector{Float64}, K::Int64, freq::Int64, γ::Float64, , labels::Vector{Float64})
function SI_NNLS_restart_densev2(C, b::Vector{Float64}, C_b::Vector{Float64}, x0::Vector{Float64}, C_x0::Vector{Float64}, blocksize::Int64, K::Int64, total_time::Float64, num_restart::Int64, freq::Int64, restart_ratio::Float64, file_path::String, ϵ)

    # extra_term = 0.5 * norm(labels)^2
    t1 = time()

    results = Results()
    init_metric = first_order_opt(C, b, x0, C_x0, C_b)
    init_epoch = 0
    init_time = 0.0

    blocks, sliced_Cs = compute_blocks_rows_slice_dense(C, blocksize)
    # @info "slice time: $(time() - t1)"
    ηs = compute_Lips_dense(sliced_Cs, blocksize)
    # col_norm = norm.(eachcol(C))
    # ηs = 1.0 ./ (col_norm.^2)

    # only consider block size = 1
    for i = 1:num_restart
        # x0, C_x0, init_metric, td = SI_NNLS(C, x0, C_x0, K, freq, init_metric, γ, blocks, row_idxs, extra_term, ηs, ubs, sliced_Cs)
        x0, C_x0, init_metric, init_epoch, init_time = SI_NNLS_densev2(C, b, C_b, x0, C_x0, ηs, blocks, sliced_Cs, K, total_time, freq, init_metric, results, init_epoch, init_time, restart_ratio, ϵ)
        @info "restart epoch: $i"
        if init_time >= total_time || init_metric < ϵ
            break
        end
    end
    exportresultstoCSV(results, file_path)

end

# nonsparse version
# function SI_NNLS(C::SparseMatrixCSC, x0::Vector{Float64}, C_x0::Vector{Float64}, K::Int64, freq::Int64,  init_metric::Float64, γ::Float64, blocks::Array{UnitRange{Int}}, row_idxs::Array{Vector{Int}}, extra_term::Float64, ηs, ubs, sliced_Cs)
function SI_NNLS_densev2(C, b::Vector{Float64}, C_b::Vector{Float64}, x0::Vector{Float64}, C_x0::Vector{Float64}, ηs, blocks, sliced_Cs, K::Int64, total_time::Float64, freq::Int64, init_metric::Float64, results::Results, init_epoch::Int64, init_time::Float64, restart_ratio, ϵ)

    t0 = time()
    m, n = size(C)
    num_blks = length(blocks)
    K *= num_blks
    prev_a, prev_A = 0.0, 0.0
    a = 1.0/(num_blks-1)
    A = a
    # later_a = num_blks/(num_blks-1)
    later_a = 1/((num_blks-1) * (num_blks-1))
    later_A = A + later_a
    @info "num_blks: $num_blks"
    p = zeros(n)
    r = zeros(n)
    s = zeros(m)
    t = zeros(m)
    q = C_x0

    x = deepcopy(x0)

    idx_seq = 1:num_blks
    prev_jk, jk = 0, 0
    for k = 1:K
        # prev_jk = jk
        jk = rand(idx_seq)
        # j = jk
        j = blocks[jk]
        Cj = sliced_Cs[jk]
        # j = blocks[jk]
        # row_j = row_idxs[jk]
        # sliced_C = sliced_Cs[jk]
        # @info "C[:, j]: $(size(C[:, j])), q: $(size(q)), s: $(size(s))"
        # tt0 = time()

        # t[row_idxs[prev_jk]] .= 0
        if k > 2
            ratio = prev_a^2 / (a * (prev_A - prev_a))
            product = ((q + (1-ratio)/prev_A * s + (num_blks-1)*ratio*t)' * Cj)[:]
            # product = q'*Cj + (1-ratio)/prev_A*(s'*Cj) + (num_blks-1)*ratio*(t' * Cj)
            # after this, set r to zero
            # t[row_idxs[prev_jk]] .= 0
        elseif k == 1
                product = (q' * Cj)[:]
                # product =  0.0
        elseif k == 2
                product = ((q + prev_a/a * t)' * Cj)[:]
        end
        # tt1 = time()
        # @info "td1: $(tt1 - tt0)"
        p[j] = p[j] .+ num_blks * a * (product .- C_b[j])
        # tt2 = time()
        # @info "td2: $(tt2 - tt1)"
        prev_xj = x[j]
        # x[j] = max.(0,  min.(x[j] - γ * (p[j] .* ηs[j]), ηs[j]))
        # x[j] = max.(0.0, min.(x0[j] - (ηs[j]) * p[j], ηs[j]*C_b[j]))
        x[j] = max.(0.0, x0[j] - (ηs[jk]) * p[j])
        # x[j] = max.(0.0,  x0[j] - (ηs[j]) * p[j])
        # tt3 = time()
        # @info "td3: $(tt3 - tt2)"
        # x[j] = max.(0.0, x[j] - γ * ηs[jk] * p[j])
        # x[j] = max(0, x[j] - p[j], ηs[j])
        # x[j] = max(0, x[j] - p[j])
        t[:] = Cj * (x[j] - prev_xj)
        q[:] = q + t
        # tt4 = time()
        # @info "td4: $(tt4 - tt3)"
        if k >= 2
            r[j] = r[j] + ((num_blks-1) * a - prev_A) * (x[j]-prev_xj)
            s[:] = s[:] + ((num_blks-1)*a - prev_A) * t[:]
            # r[j] = r[j] + ( - prev_A) * (x[j]-prev_xj)
        end
        # tt5 = time()
        # @info "td5: $(tt5 - tt4)"
        prev_a, prev_A = a, A
        a, A = later_a, later_A
        later_a = min(num_blks/(num_blks-1)*later_a, sqrt(later_A)/(2*num_blks))
        # a = min(n/(n-1)*a, sqrt(A)/(2*sqrt(n)))
        # a = min(n/(n-1)*a, sqrt(A)/2)
        later_A = later_A + later_a
        # @info k
        # tt6 = time()
        # @info "td6: $(tt6 - tt5)"
        if k % (freq * num_blks) == 0
            # C_x = C * x
            x̃ = x + 1.0/prev_A * r
            C_x̃ = C * x̃
            metric = first_order_opt(C, b, x̃, C_x̃, C_b)
            func_value = 0.5 * norm(C_x̃ - b)^2
            td = time() - t0
            @info "k: $(k÷num_blks), time: $(td+init_time), metric: $metric,  func_value: $func_value"
            logresult!(results, k + init_epoch, td+init_time, metric, func_value)
            if metric <= restart_ratio * init_metric || td + init_time > total_time || metric < ϵ || k == K
                return x̃, C_x̃, metric, k + init_epoch, td + init_time
            end
        end

        # if k % (freq * num_blks) == 0
        #     # @info "entesr:"
        #     x̃ = x + 1.0/prev_A * r
        #     C_x̃ = C * x̃
        #     tmp = 1 .- (C_x̃' * C)'
        #     metric1 = norm(max.(tmp, 0))^2
        #     # @info " metric1: $metric1"
        #     metric2 = first_order_opt(C, x̃, C_x̃)
        #     # @info " metric2: $metric2"
        #     metric3 = first_order_optv2(C, x̃, C_x̃)
        #     # @info " metric3: $metric3"
        #     func_value = extra_term + 0.5 * norm(C_x̃)^2 - sum(x̃)
        #     td = time() - init_time
        #
        #     tmp2 = 1 .- (q' * C)'
        #     metric4 = norm(max.(tmp2, 0))^2
        #     func_value4 = extra_term + 0.5 * norm(q)^2 - sum(x)
        #     # @info "epoch: $(k / (num_blks)), time: $(td), metric1: $metric1, metric2: $metric2, metric3: $metric3, "
        #     @info "epoch: $(k / (num_blks)), time: $(td), metric1: $metric1, metric2: $metric2, metric3: $metric3, func_value: $func_value, A: $prev_A, metric4: $metric4, func_value4: $func_value4"
        #     # if metric <= 0.7 * init_metric || k / (freq * num_blks) >= 30
        #     if metric2 <= 0.7 * init_metric
        #         # @info "restart!"
        #         return x̃, C_x̃, metric2, td
        #     end
        # end
    end
end
