#==============================================================================
# Vecchia for 3D temperature data
# Mark Risser
# Lawrence Berkeley National Laboratory
# May, 2025
#==============================================================================

set.seed(0)

# Scaled vecchia - https://arxiv.org/abs/2005.00386
# https://doi.org/10.1137/20M1352156
source('https://raw.githubusercontent.com/katzfuss-group/scaledVecchia/master/vecchia_scaled.R')

y <- read.csv("3d_climate/y_train_3dclimate.csv", header = FALSE)$V1
ytest <- read.csv("3d_climate/y_test_3dclimate.csv", header = FALSE)$V1
x <- as.matrix(read.csv("3d_climate/x_train_3dclimate.csv", header = FALSE, sep = " "))
xtest <- as.matrix(read.csv("3d_climate/x_test_3dclimate.csv", header = FALSE, sep = " "))

# Training
# Defaults: trend = "pre" (subtract sample mean as a preprocessing step)
#           ms = 10
#           nu = 4.5, noise variance = 0.001
prt <- proc.time()
fit <- fit_scaled(y = y, inputs = x, ms = 10, nug = NULL, nu = 4.5, n.est = nrow(x), print.level = 2)
print(paste0("Training time: ", (proc.time() - prt)[3], " seconds"))
print(summary(fit))
save(fit, file = "fit_3d.RData")
prt <- proc.time()
preds <- predictions_scaled(fit = fit, locs_pred = xtest, nsims = 200)
print(paste0("Prediction time for test points: ", (proc.time() - prt)[3], " seconds"))
save(preds, file = "preds_3d.RData")

# RMSE
print(paste0("RMSE: ", round(sqrt(mean((ytest - preds$means)^2)), 4)))

# Write results to CSV
ypred_se <- apply(preds$samples, 1, sd)
results_CAhousing <- data.frame( x_test = xtest, y_test = preds$means, y_standard_dev = ypred_se )
write.csv(results_CAhousing, file = "vecchia_3d_results_final.csv", row.names = FALSE)