#==============================================================================
# Vecchia for 8D 8d_CAhousing data
# Mark Risser
# Lawrence Berkeley National Laboratory
# May, 2025
#==============================================================================

set.seed(0)

# Scaled vecchia - https://arxiv.org/abs/2005.00386
# https://doi.org/10.1137/20M1352156
source('https://raw.githubusercontent.com/katzfuss-group/scaledVecchia/master/vecchia_scaled.R')

# Setup / load data
y <- read.csv("8d_CAhousing/y_train_CAhousing.csv", header = FALSE)$V1
ytest <- read.csv("8d_CAhousing/y_test_CAhousing.csv", header = FALSE)$V1
x <- as.matrix(read.csv("8d_CAhousing/x_train_CAhousing.csv", header = FALSE, sep = " "))
xtest <- as.matrix(read.csv("8d_CAhousing/x_test_CAhousing.csv", header = FALSE, sep = " "))

# Training
# Defaults: trend = "pre" (subtract sample mean as a preprocessing step)
#           ms = 30
#           nu = 4.5, noise variance = 0.001
prt <- proc.time()
fit <- fit_scaled(y = y, inputs = x, ms = 30, nu = 4.5, nug = 0.001, n.est = nrow(x), print.level = 0)
print(paste0("Training time: ", (proc.time() - prt)[3], " seconds"))
print(summary(fit))
prt <- proc.time()
preds <- predictions_scaled(fit = fit, locs_pred = xtest, nsims = 200)
print(paste0("Prediction time for test points: ", (proc.time() - prt)[3], " seconds"))

# RMSE
print(paste0("RMSE: ", round(sqrt(mean((ytest - preds$means)^2)), 4)))

# Write results to CSV
ypred_se <- apply(preds$samples, 1, sd)
results_CAhousing <- data.frame( x_test = xtest, y_test = preds$means, y_standard_dev = ypred_se )
write.csv(results_CAhousing, file = "vecchia_CAhousing_results_final.csv", row.names = FALSE)