using Turing
using Distributions
using MCMCChains
using Plots
using StatsPlots
using Distances
using Random
using CSV
using DataFrames
using MLDataUtils: shuffleobs, splitobs, rescale!

function onehot!(df::AbstractDataFrame, col, cate = sort(unique(df[!, col])); outnames = Symbol.(:ohe_, cate))
    transform!(df, @. col => ByRow(isequal(cate)) .=> outnames)
end

function onehot(df::AbstractDataFrame, col, cate = sort(unique(df[!, col])); outnames = Symbol.(:ohe_, cate))
    transform(df, @. col => ByRow(isequal(cate)) .=> outnames)
end

Random.seed!(0)

data = DataFrame(CSV.File("Bayesian/insurance.csv"))

first(data, 6)

transform!(data, :region => (x -> x .== ["southwest" "southeast" "northwest" "northeast"]) => [:southwest, :southeast, :northwest, :northeast])
select!(data, Not([:region]))
data[!, :smoker] = [v == "yes" ? 1. : 0. for v in data[!, :smoker]]
data[!, :sex] = [v == "male" ? 1. : 0. for v in data[!, :sex]]
data = Float64.(data)

trainset, testset = splitobs(shuffleobs(data), 0.7)

target = :charges
train = Matrix(select(trainset, Not(target)))
test = Matrix(select(testset, Not(target)))
train_target = trainset[:, target]
test_target = testset[:, target]

# Standardise the features.
μ, σ = rescale!(train; obsdim = 1)
rescale!(test, μ, σ; obsdim = 1)

μtarget, σtarget = rescale!(train_target; obsdim = 1)
rescale!(test_target, μtarget, σtarget; obsdim = 1)

# Bayesian linear regression.
@model function linear_regression(x, y)

    σ₂ ~ truncated(Normal(0, 100), 0, Inf)

    # Set intercept prior.
    intercept ~ Normal(0, sqrt(3))

    # Set the priors on our coefficients.
    nfeatures = size(x, 2)
    coefficients ~ MvNormal(nfeatures, sqrt(10))

    # Calculate all the mu terms.
    mu = intercept .+ x * coefficients
    y ~ MvNormal(mu, sqrt(σ₂))
end

model = linear_regression(train, train_target)
chain = sample(model, NUTS(0.65), 3_000)

plot(chain)
stats = DataFrame.(describe(chain))
select!(stats[2], Not(:parameters))
stats = hcat(stats[1], stats[2])

# Import the GLM package.
using GLM

# Perform multiple regression OLS.
train_with_intercept = hcat(ones(size(train, 1)), train)
ols = lm(train_with_intercept, train_target)

# Compute predictions on the training data set
# and unstandardize them.
p = GLM.predict(ols)
train_prediction_ols = μtarget .+ σtarget .* p

# Compute predictions on the test data set
# and unstandardize them.
test_with_intercept = hcat(ones(size(test, 1)), test)
p = GLM.predict(ols, test_with_intercept)
test_prediction_ols = μtarget .+ σtarget .* p

# Make a prediction given an input vector.
function prediction(chain, x)
    p = get_params(chain[200:end, :, :])
    targets = p.intercept' .+ x * reduce(hcat, p.coefficients)'
    return vec(mean(targets; dims = 2))
end

# Calculate the predictions for the training and testing sets
# and unstandardize them.
p = prediction(chain, train)
train_prediction_bayes = μtarget .+ σtarget .* p
p = prediction(chain, test)
test_prediction_bayes = μtarget .+ σtarget .* p

# Show the predictions on the test data set.
DataFrame(
    Cost = testset[!, target],
    Bayes = test_prediction_bayes,
    OLS = test_prediction_ols
)

println(
    "Training set:",
    "\n\tBayes loss: ",
    msd(train_prediction_bayes, trainset[!, target]),
    "\n\tOLS loss: ",
    msd(train_prediction_ols, trainset[!, target])
)

println(
    "Test set:",
    "\n\tBayes loss: ",
    msd(test_prediction_bayes, testset[!, target]),
    "\n\tOLS loss: ",
    msd(test_prediction_ols, testset[!, target])
)
