#==============================================================================
# Vecchia for 2D topography data
# Mark Risser
# Lawrence Berkeley National Laboratory
# May, 2025
#==============================================================================

set.seed(0)

# Setup / load data
library(GpGp)
y <- read.csv("2d_topo/y_train_2dtopo.csv", header = FALSE)$V1
ytest <- read.csv("2d_topo/y_test_2dtopo.csv", header = FALSE)$V1
x <- as.matrix(read.csv("2d_topo/x_train_2dtopo.csv", header = FALSE, sep = " "))
xtest <- as.matrix(read.csv("2d_topo/x_test_2dtopo.csv", header = FALSE, sep = " "))

# Training
# Note: below, 'fixed_parms = c(3,4)' means the Matern smoothness (10) and 
#       noise variance (1) are fixed and not trained.
prt <- proc.time()
fit <- fit_model(y = y, locs = x, start_parms = c(1, 0.1, 10, 1), m_seq = 30, fixed_parms = c(3,4), silent = TRUE)
print(paste0("Training time: ", (proc.time() - prt)[3], " seconds"))
print(summary(fit))
prt <- proc.time()
ypred <- predictions(fit = fit, locs_pred = xtest, X_pred = matrix(1, ncol=1, nrow=nrow(xtest)))
ypred_condsim <- cond_sim( fit, locs_pred = xtest, X_pred = matrix(1, ncol=1, nrow=nrow(xtest)), nsims = 200 )
ypred_se <- rep(NA, length(ypred))
for(j in 1:length(ypred)){
  ypred_se[j] <- sqrt( mean( (ypred_condsim[j,] - ypred[j] )^2 ) )
}
print(paste0("Prediction time for test points: ", (proc.time() - prt)[3], " seconds"))

# RMSE
print(paste0("RMSE: ", round(sqrt(mean((ypred - ytest)^2)), 4)))

# Write results to CSV
results_2d <- data.frame( x_test = xtest, y_test = ypred, y_standard_dev = ypred_se )
write.csv(results_2d, file = "vecchia_2d_results_final.csv", row.names = FALSE)