using Lasso, Distributions, LinearAlgebra, Random
using DataFrames, CSV, Statistics

BLAS.set_num_threads(1)

covtype = CSV.read("covtype.data", DataFrame, header = string.(1:55))
covtype."55"[covtype."55" .!= 4] .= 0
covtype."55"[covtype."55" .== 4] .= 1

X1 = Matrix(covtype[:,1:10])
X2 = Matrix(covtype[:,11:13])
X3 = Matrix(covtype[:,15:53])
colmin = minimum(X1, dims=1)
X1 = X1 .- colmin
X1 = log.(X1 .+ 1)
X = [X1 X2 X3]
y = covtype[:, 55]
y1 = Vector{Bool}(undef, length(y))
y1 .= y

covtype = nothing


include("../main/functions.jl")


case = haskey(ENV, "case") ? parse(Int, ENV["case"]) : 1

ver = "1"

if case == 1
    crtn = "P"
elseif case == 2
    crtn = "A"
elseif case == 3
    crtn = "L"
end


Np = 1000
S = 500
rhos = [0.003, 0.005, 0.007, 0.01, 0.015]
lthr = 1e-3
gammas = [1]
k = size(X, 2)

Random.seed!(2)
name = string("covtype-", ver, "-", crtn)
for gamma in gammas, rho in rhos
    aucs = zeros(S)
    @time for i in 1:S
        try
            plt = PilotEst2(X, y1, Np, criterion = string(crtn, "-opt"),
                            standardize=true)
            est = SubsampleEst2(X, y1, plt, gamma, rho, lthr,
                                nlambda = 100, eps = 0.0001,
                                lambda_max = 5,
                                method = "bic", criterion = string(crtn, "-opt"),
                                standardize=false)
            aucs[i] = est.auc
        catch e
            println(i)
            aucs[i] = NaN
        end
    end
    mkpath("results/covtype")
    aucdf = DataFrame(aucdf = aucs)
    CSV.write(string("results/covtype/Subsampling-auc-", name, "-", rho, ".csv"), aucdf)
end
