Y_test_hat_LGS<-predict(model_LGS, newdata = X_test_df)
mse_LGS  <- mean((Y_test-Y_test_hat_LGS)^2)
rmse_LGS<-sqrt(mse_LGS)
errors$rmse_UNIF_kernel[i] <-rmse_UNIF_kernel
errors$rmse_L2MS[i]  <-rmse_L2MS
errors$rmse_LGMS[i]  <-rmse_LGMS
errors$rmse_UNIF[i] <- rmse_UNIF
errors$rmse_GMS[i]  <- rmse_GMS
errors$rmse_LGS[i]  <- rmse_LGS
}
mean_errors <- colMeans(errors)
write.csv(errors, file = "errors_output.csv", row.names = FALSE)
# 计算每种方法RMSE的标准差
sd_errors <- apply(errors, 2, sd)
# 打印均值和标准差结果到控制台
cat("\n--- Error Summary (RMSE) ---\n")
cat("Mean RMSE for each method:\n")
print(mean_errors)
cat("\nStandard Deviation of RMSE for each method:\n")
print(sd_errors)
# (可选步骤) 将均值和标准差合并到一个更易读的汇总数据框中
summary_statistics <- data.frame(
Method = names(mean_errors), # 获取方法名作为单独一列
Mean_RMSE = mean_errors,      # 均值列
SD_RMSE = sd_errors,          # 标准差列
row.names = NULL              # 清除数据框的默认行名，让 "Method" 列更突出
)
print(summary_statistics)
# (可选步骤) 将这个汇总统计数据框保存到新的CSV文件
write.csv(summary_statistics, file = "rmse_summary_statistics.csv", row.names = FALSE)
library(MASS)
library(glmnet)
library(Matrix)
library(foreach)
library(doParallel)
library(doRNG)
library(kernlab)
library(KRLS)
library(caret)
setwd("~/Desktop/Program/R_project/Markov_subsampling/kernel/realdata1")
source("function_linear.R")
set.seed(521)
burn<-300
q=0.2
d = 1## LGS的参数
a = b = 1
c = 0.01 ##LGS 的参数  a
M <- 1
data <- read.csv("nasdaq_processed_R.csv", header = TRUE)
errors <- data.frame(rmse_UNIF_kernel= numeric(M) ,
rmse_L2MS= numeric(M),
rmse_LGMS= numeric(M),
rmse_UNIF = numeric(M),
rmse_GMS = numeric(M),
rmse_LGS = numeric(M))
N<-nrow(data)
for (i in 1:M) {
set.seed(i)
feature_names <- c(
#'Open', 'High', 'Low', 'Close', 'Volume',  # 当日市场数据
'InterestRate', 'ExchangeRate', 'VIX', 'TEDSpread', 'EFFR', 'Gold', 'Oil', # 经济指标
'Return_Lag_1', 'Return_Lag_2', 'Return_Lag_3', 'Return_Lag_4', 'Return_Lag_5', # 滞后收益率
#'Close_Lag_1', # 前一日收盘价
'MA_5', 'MA_20' # 移动平均线
)
Y<-data.matrix(subset(data,select=Target_Return))
X<-data.matrix(subset(data,select=feature_names))
# 7. 数据分割 (按时间顺序)
# 分割为训练集 (80%) 和测试集 (20%)
split_index <- floor(0.8 * nrow(X))
X_train <- X[1:split_index, ]
X_test <- X[(split_index + 1):nrow(X), ]
Y_train <- Y[1:split_index, , drop = FALSE]
# 使用 drop = FALSE 确保 Y_test 是矩阵，并修正索引
Y_test <- Y[(split_index + 1):nrow(Y), , drop = FALSE]
# 8. 数据标准化 (Scaling)
# 计算训练集的均值和标准差
train_mean <- apply(X_train, 2, mean)
train_sd <- apply(X_train, 2, sd)
# 标准化训练集
X_train<- scale(X_train, center = train_mean, scale = train_sd)
# 使用训练集的均值和标准差来标准化测试集
X_test<- scale(X_test, center = train_mean, scale = train_sd)
##################加污染######################
p_val<-ncol(X_train)
N_train<- nrow(X_train)
W <- matrix(0, nrow = N_train, ncol = p_val)
O<-matrix(0,nrow=N_train,ncol=1)
# # 计算需要填充的行数
num_filled_rows <- ceiling(q * N_train)
filled_rows <- sample(1:N_train, num_filled_rows)
##O1
W[filled_rows, ] <- matrix(runif(num_filled_rows * p_val, 1,2), nrow = num_filled_rows, ncol = p_val)
O[filled_rows,]<-matrix(rnorm(num_filled_rows*1,mean=1,sd=sqrt(5)), nrow = num_filled_rows, ncol = 1)
#O2
#W[filled_rows, ] <- matrix(runif(num_filled_rows * p_val, 1,2), nrow = num_filled_rows, ncol = p_val)
#O[filled_rows,]<-matrix(rnorm(num_filled_rows*1,mean=0,sd=sqrt(5)), nrow = num_filled_rows, ncol = 1)
W[filled_rows, ] <- matrix(rnorm(num_filled_rows * p_val, -1,2), nrow = num_filled_rows, ncol = p_val)
O[filled_rows,]<- matrix(rnorm(num_filled_rows*1,mean=0,sd=sqrt(5)), nrow = num_filled_rows, ncol = 1)
X_train[filled_rows,]<-W[filled_rows, ]
Y_train[filled_rows,]<-O[filled_rows,]
########抽样的样本量500，1000，1500
n_val<-1000
# 定义高斯核
#estimated_sigma <- sigest(X_train, scaled = TRUE) # 如果数据已经标准化
#sigma<- as.numeric(estimated_sigma[2])
#rbf_kernel  <- rbfdot(sigma = sigma) # sigest返回多个值，通常取中间一个
rbf_kernel  <- rbfdot(sigma =p_val)
####################### 均匀抽样###################
idx <- sample(1: N_train, n_val, replace = FALSE)
# 使用相同的索引来选取
X_UNIF = X_train[idx, ]
Y_UNIF = Y_train[idx, ]
#计算均匀采样训练集核矩阵
K_UNIF<-kernelMatrix(rbf_kernel,X_UNIF)
# 拟合最终模型(核方法)
fit_final_f0 <- krls(X = X_UNIF, y = Y_UNIF,derivative = FALSE, vcov= FALSE)
Yhat_train <- predict(fit_final_f0, newdata = X_train)$fit
## 计算残差
res<-abs(Y_train-Yhat_train)
## 计算残差平方
res_2<-res^2
##计算训练集在初始模型下的核矩阵
K_train<-kernelMatrix(rbf_kernel,X_train,X_UNIF)
# 计算每个样本的核范数，即计算每行的L2范数
K_norms_train <-sqrt(rowSums( K_train^2))
rm(K_train) # 删除核矩阵
#########################2014############
# 抽样概率
G<-exp(res^2)
max_iter <- 1 #（迭代次数）
tolerance <- 0.001
iter <- 0
while(iter < max_iter) {
iter <- iter + 1
## 开始抽样 小梯度markov
L2MS_result<-LGMS(X_train,Y_train, G, 1, burn, n_val)
X_L2MS<-L2MS_result$X_selected
Y_L2MS<-L2MS_result$Y_selected
indices_L2MS<-L2MS_result$indices
# 拟合最终模型
fit_final_L2MS <- krls(X = X_L2MS, y = Y_L2MS,derivative=FALSE,vcov=FALSE)
}
##############################我们的方法####################
# 抽样概率
G_LGMS<-res/K_norms_train
max_iter <- 5 #（迭代次数）
tolerance <- 0.001
iter <- 0
diff_norm<-Inf
while(iter < max_iter & diff_norm > tolerance) {
iter <- iter + 1
## 开始抽样 小梯度markov
LGMS_result<-LGMS(X_train,Y_train, G_LGMS, 1, burn, n_val)
X_LGMS<-LGMS_result$X_selected
Y_LGMS<-LGMS_result$Y_selected
indices_LGMS<-LGMS_result$indices
# 拟合最终模型
fit_final_LGMS<-krls(X = X_LGMS, y = Y_LGMS,derivative=FALSE,vcov=FALSE)
#更新训练集的核以及二范数
K_train<-kernelMatrix(rbf_kernel,X_train,X_LGMS)
K_norms_train<-sqrt(rowSums( K_train^2))
rm(K_train)
# 用干净样本得到的模型预测预测训练集实现迭代
Yhat_train_new <- predict(fit_final_LGMS, newdata =X_train)$fit
#更新残差
res<-abs(Y_train-Yhat_train_new)
#更新抽样概率
G_LGMS<-res/K_norms_train
#迭代停止条件
diff_norm<- mean((Yhat_train-Yhat_train_new)^2)
}
####################线性方法#####################
######用线性回归模型拟合均匀抽样的结果来获得初始beta
#合并Y和X的数据
train_UNIF <- data.frame(Y = Y_UNIF,  X_UNIF )
# 2. 拟合线性回归模型
model_UNIF <- lm(Y ~ ., data = train_UNIF)
X_train_df<- as.data.frame(X_train)
Y_train_hat_UNIF <- predict(model_UNIF, newdata = X_train_df)
########## GMS ########
# 计算梯度
G <- gradient(X_train, Y_train, Y_train_hat_UNIF, p_val)$G
GMS_result <- GMS(X_train, Y_train, G, 1,burn, n_val)
X_GMS <- GMS_result$X_selected
Y_GMS <- GMS_result$Y_selected
train_GMS <- data.frame(Y = Y_GMS,  X_GMS )
model_GMS <- lm(Y ~ ., data = train_GMS)
########## LGS  #############
iter <- 0
diff_norm <- Inf
max_iter <- 5
tolerance <- 0.0001
Y_hat_prev_LGS <-Y_train_hat_UNIF# 上一次迭代的系数估计值
nu <- rep(0, p_val)
Gradient_result <- gradient( X_train, Y_train, Y_hat_prev_LGS, p_val)
G<-Gradient_result$G
while(iter < max_iter & diff_norm > tolerance) {
iter <- iter + 1
# LGS
LGS_result <- LGS(X_train, Y_train, G, 0,  n_val,N_train)
X_LGS <- LGS_result$X_selected
Y_LGS <- LGS_result$Y_selected
train_LGS <- data.frame(Y = Y_LGS,  X_LGS )
model_LGS<- lm(Y ~ ., data = train_LGS)
Y_hat_new_LGS<-predict(model_LGS, newdata = X_train_df)
#更新梯度矩阵
Gradient_result <- gradient(X_train, Y_train, Y_hat_new_LGS, p_val)
G<-Gradient_result$G
G_matrix<-Gradient_result$G_matrix
#求梯度矩阵的均值
mu <- G_matrix |> apply(2, mean)
#更新nu
nu <- c * (iter / log(1+abs(mu)))^d
# 计算差的二范数
diff_norm <- mean((Y_hat_prev_LGS - Y_hat_new_LGS)^2)
# 更新估计值
Y_hat_prev_LGS <-Y_hat_new_LGS
}
#核方法下的均匀抽样
Y_test_hat_UNIF_kernel<-predict(fit_final_f0, newdata = X_test)$fit
mse_UNIF_kernel  <- mean((Y_test-Y_test_hat_UNIF_kernel)^2)
rmse_UNIF_kernel <-sqrt(mse_UNIF_kernel)
#核方法2014
Y_test_hat_L2MS<-predict(fit_final_L2MS, newdata = X_test)$fit
mse_L2MS  <- mean((Y_test-Y_test_hat_L2MS)^2)
rmse_L2MS  <- sqrt( mse_L2MS)
# 迭代停止计算我们方法测试集的MSE
Y_test_hat_LGMS<-predict(fit_final_LGMS, newdata = X_test)$fit
mse_LGMS  <- mean((Y_test-Y_test_hat_LGMS)^2)
rmse_LGMS<-sqrt(mse_LGMS)
##############线性模型下的#########
X_test_df<-as.data.frame(X_test)
#均匀抽样在测试集上的MSE
Y_test_hat_UNIF<-predict(model_UNIF, newdata = X_test_df)
mse_UNIF<-mean((Y_test-Y_test_hat_UNIF)^2)
rmse_UNIF<-sqrt(mse_UNIF)
#GMS在测试集上的MSE
Y_test_hat_GMS<-predict(model_GMS, newdata = X_test_df)
mse_GMS  <- mean((Y_test-Y_test_hat_GMS)^2)
rmse_GMS<-sqrt(mse_GMS)
#LGS在测试集上的MSE
Y_test_hat_LGS<-predict(model_LGS, newdata = X_test_df)
mse_LGS  <- mean((Y_test-Y_test_hat_LGS)^2)
rmse_LGS<-sqrt(mse_LGS)
errors$rmse_UNIF_kernel[i] <-rmse_UNIF_kernel
errors$rmse_L2MS[i]  <-rmse_L2MS
errors$rmse_LGMS[i]  <-rmse_LGMS
errors$rmse_UNIF[i] <- rmse_UNIF
errors$rmse_GMS[i]  <- rmse_GMS
errors$rmse_LGS[i]  <- rmse_LGS
}
mean_errors <- colMeans(errors)
write.csv(errors, file = "errors_output.csv", row.names = FALSE)
# 计算每种方法RMSE的标准差
sd_errors <- apply(errors, 2, sd)
# 打印均值和标准差结果到控制台
cat("\n--- Error Summary (RMSE) ---\n")
cat("Mean RMSE for each method:\n")
print(mean_errors)
cat("\nStandard Deviation of RMSE for each method:\n")
print(sd_errors)
# (可选步骤) 将均值和标准差合并到一个更易读的汇总数据框中
summary_statistics <- data.frame(
Method = names(mean_errors), # 获取方法名作为单独一列
Mean_RMSE = mean_errors,      # 均值列
SD_RMSE = sd_errors,          # 标准差列
row.names = NULL              # 清除数据框的默认行名，让 "Method" 列更突出
)
print(summary_statistics)
# (可选步骤) 将这个汇总统计数据框保存到新的CSV文件
write.csv(summary_statistics, file = "rmse_summary_statistics.csv", row.names = FALSE)
library(MASS)
library(glmnet)
library(Matrix)
library(foreach)
library(doParallel)
library(doRNG)
library(kernlab)
library(KRLS)
library(caret)
setwd("~/Desktop/Program/R_project/Markov_subsampling/kernel/realdata1")
source("function_linear.R")
set.seed(521)
burn<-300
q=0.2
d = 1## LGS的参数
a = b = 1
c = 0.01 ##LGS 的参数  a
M <- 1
data <- read.csv("nasdaq_processed_R.csv", header = TRUE)
errors <- data.frame(rmse_UNIF_kernel= numeric(M) ,
rmse_L2MS= numeric(M),
rmse_LGMS= numeric(M),
rmse_UNIF = numeric(M),
rmse_GMS = numeric(M),
rmse_LGS = numeric(M))
N<-nrow(data)
for (i in 1:M) {
set.seed(i)
feature_names <- c(
#'Open', 'High', 'Low', 'Close', 'Volume',  # 当日市场数据
'InterestRate', 'ExchangeRate', 'VIX', 'TEDSpread', 'EFFR', 'Gold', 'Oil', # 经济指标
'Return_Lag_1', 'Return_Lag_2', 'Return_Lag_3', 'Return_Lag_4', 'Return_Lag_5', # 滞后收益率
#'Close_Lag_1', # 前一日收盘价
'MA_5', 'MA_20' # 移动平均线
)
Y<-data.matrix(subset(data,select=Target_Return))
X<-data.matrix(subset(data,select=feature_names))
# 7. 数据分割 (按时间顺序)
# 分割为训练集 (80%) 和测试集 (20%)
split_index <- floor(0.8 * nrow(X))
X_train <- X[1:split_index, ]
X_test <- X[(split_index + 1):nrow(X), ]
Y_train <- Y[1:split_index, , drop = FALSE]
# 使用 drop = FALSE 确保 Y_test 是矩阵，并修正索引
Y_test <- Y[(split_index + 1):nrow(Y), , drop = FALSE]
# 8. 数据标准化 (Scaling)
# 计算训练集的均值和标准差
train_mean <- apply(X_train, 2, mean)
train_sd <- apply(X_train, 2, sd)
# 标准化训练集
X_train<- scale(X_train, center = train_mean, scale = train_sd)
# 使用训练集的均值和标准差来标准化测试集
X_test<- scale(X_test, center = train_mean, scale = train_sd)
##################加污染######################
p_val<-ncol(X_train)
N_train<- nrow(X_train)
W <- matrix(0, nrow = N_train, ncol = p_val)
O<-matrix(0,nrow=N_train,ncol=1)
# # 计算需要填充的行数
num_filled_rows <- ceiling(q * N_train)
filled_rows <- sample(1:N_train, num_filled_rows)
##O1
#W[filled_rows, ] <- matrix(runif(num_filled_rows * p_val, 1,2), nrow = num_filled_rows, ncol = p_val)
#O[filled_rows,]<-matrix(rnorm(num_filled_rows*1,mean=1,sd=sqrt(5)), nrow = num_filled_rows, ncol = 1)
#O2
W[filled_rows, ] <- matrix(rnorm(num_filled_rows * p_val, -1,2), nrow = num_filled_rows, ncol = p_val)
O[filled_rows,]<- matrix(rnorm(num_filled_rows*1,mean=0,sd=sqrt(5)), nrow = num_filled_rows, ncol = 1)
X_train[filled_rows,]<-W[filled_rows, ]
Y_train[filled_rows,]<-O[filled_rows,]
########抽样的样本量500，1000，1500
n_val<-1000
# 定义高斯核
#estimated_sigma <- sigest(X_train, scaled = TRUE) # 如果数据已经标准化
#sigma<- as.numeric(estimated_sigma[2])
#rbf_kernel  <- rbfdot(sigma = sigma) # sigest返回多个值，通常取中间一个
rbf_kernel  <- rbfdot(sigma =p_val)
####################### 均匀抽样###################
idx <- sample(1: N_train, n_val, replace = FALSE)
# 使用相同的索引来选取
X_UNIF = X_train[idx, ]
Y_UNIF = Y_train[idx, ]
#计算均匀采样训练集核矩阵
K_UNIF<-kernelMatrix(rbf_kernel,X_UNIF)
# 拟合最终模型(核方法)
fit_final_f0 <- krls(X = X_UNIF, y = Y_UNIF,derivative = FALSE, vcov= FALSE)
Yhat_train <- predict(fit_final_f0, newdata = X_train)$fit
## 计算残差
res<-abs(Y_train-Yhat_train)
## 计算残差平方
res_2<-res^2
##计算训练集在初始模型下的核矩阵
K_train<-kernelMatrix(rbf_kernel,X_train,X_UNIF)
# 计算每个样本的核范数，即计算每行的L2范数
K_norms_train <-sqrt(rowSums( K_train^2))
rm(K_train) # 删除核矩阵
#########################2014############
# 抽样概率
G<-exp(res^2)
max_iter <- 1 #（迭代次数）
tolerance <- 0.001
iter <- 0
while(iter < max_iter) {
iter <- iter + 1
## 开始抽样 小梯度markov
L2MS_result<-LGMS(X_train,Y_train, G, 1, burn, n_val)
X_L2MS<-L2MS_result$X_selected
Y_L2MS<-L2MS_result$Y_selected
indices_L2MS<-L2MS_result$indices
# 拟合最终模型
fit_final_L2MS <- krls(X = X_L2MS, y = Y_L2MS,derivative=FALSE,vcov=FALSE)
}
##############################我们的方法####################
# 抽样概率
G_LGMS<-res/K_norms_train
max_iter <- 5 #（迭代次数）
tolerance <- 0.001
iter <- 0
diff_norm<-Inf
while(iter < max_iter & diff_norm > tolerance) {
iter <- iter + 1
## 开始抽样 小梯度markov
LGMS_result<-LGMS(X_train,Y_train, G_LGMS, 1, burn, n_val)
X_LGMS<-LGMS_result$X_selected
Y_LGMS<-LGMS_result$Y_selected
indices_LGMS<-LGMS_result$indices
# 拟合最终模型
fit_final_LGMS<-krls(X = X_LGMS, y = Y_LGMS,derivative=FALSE,vcov=FALSE)
#更新训练集的核以及二范数
K_train<-kernelMatrix(rbf_kernel,X_train,X_LGMS)
K_norms_train<-sqrt(rowSums( K_train^2))
rm(K_train)
# 用干净样本得到的模型预测预测训练集实现迭代
Yhat_train_new <- predict(fit_final_LGMS, newdata =X_train)$fit
#更新残差
res<-abs(Y_train-Yhat_train_new)
#更新抽样概率
G_LGMS<-res/K_norms_train
#迭代停止条件
diff_norm<- mean((Yhat_train-Yhat_train_new)^2)
}
####################线性方法#####################
######用线性回归模型拟合均匀抽样的结果来获得初始beta
#合并Y和X的数据
train_UNIF <- data.frame(Y = Y_UNIF,  X_UNIF )
# 2. 拟合线性回归模型
model_UNIF <- lm(Y ~ ., data = train_UNIF)
X_train_df<- as.data.frame(X_train)
Y_train_hat_UNIF <- predict(model_UNIF, newdata = X_train_df)
########## GMS ########
# 计算梯度
G <- gradient(X_train, Y_train, Y_train_hat_UNIF, p_val)$G
GMS_result <- GMS(X_train, Y_train, G, 1,burn, n_val)
X_GMS <- GMS_result$X_selected
Y_GMS <- GMS_result$Y_selected
train_GMS <- data.frame(Y = Y_GMS,  X_GMS )
model_GMS <- lm(Y ~ ., data = train_GMS)
########## LGS  #############
iter <- 0
diff_norm <- Inf
max_iter <- 5
tolerance <- 0.0001
Y_hat_prev_LGS <-Y_train_hat_UNIF# 上一次迭代的系数估计值
nu <- rep(0, p_val)
Gradient_result <- gradient( X_train, Y_train, Y_hat_prev_LGS, p_val)
G<-Gradient_result$G
while(iter < max_iter & diff_norm > tolerance) {
iter <- iter + 1
# LGS
LGS_result <- LGS(X_train, Y_train, G, 0,  n_val,N_train)
X_LGS <- LGS_result$X_selected
Y_LGS <- LGS_result$Y_selected
train_LGS <- data.frame(Y = Y_LGS,  X_LGS )
model_LGS<- lm(Y ~ ., data = train_LGS)
Y_hat_new_LGS<-predict(model_LGS, newdata = X_train_df)
#更新梯度矩阵
Gradient_result <- gradient(X_train, Y_train, Y_hat_new_LGS, p_val)
G<-Gradient_result$G
G_matrix<-Gradient_result$G_matrix
#求梯度矩阵的均值
mu <- G_matrix |> apply(2, mean)
#更新nu
nu <- c * (iter / log(1+abs(mu)))^d
# 计算差的二范数
diff_norm <- mean((Y_hat_prev_LGS - Y_hat_new_LGS)^2)
# 更新估计值
Y_hat_prev_LGS <-Y_hat_new_LGS
}
#核方法下的均匀抽样
Y_test_hat_UNIF_kernel<-predict(fit_final_f0, newdata = X_test)$fit
mse_UNIF_kernel  <- mean((Y_test-Y_test_hat_UNIF_kernel)^2)
rmse_UNIF_kernel <-sqrt(mse_UNIF_kernel)
#核方法2014
Y_test_hat_L2MS<-predict(fit_final_L2MS, newdata = X_test)$fit
mse_L2MS  <- mean((Y_test-Y_test_hat_L2MS)^2)
rmse_L2MS  <- sqrt( mse_L2MS)
# 迭代停止计算我们方法测试集的MSE
Y_test_hat_LGMS<-predict(fit_final_LGMS, newdata = X_test)$fit
mse_LGMS  <- mean((Y_test-Y_test_hat_LGMS)^2)
rmse_LGMS<-sqrt(mse_LGMS)
##############线性模型下的#########
X_test_df<-as.data.frame(X_test)
#均匀抽样在测试集上的MSE
Y_test_hat_UNIF<-predict(model_UNIF, newdata = X_test_df)
mse_UNIF<-mean((Y_test-Y_test_hat_UNIF)^2)
rmse_UNIF<-sqrt(mse_UNIF)
#GMS在测试集上的MSE
Y_test_hat_GMS<-predict(model_GMS, newdata = X_test_df)
mse_GMS  <- mean((Y_test-Y_test_hat_GMS)^2)
rmse_GMS<-sqrt(mse_GMS)
#LGS在测试集上的MSE
Y_test_hat_LGS<-predict(model_LGS, newdata = X_test_df)
mse_LGS  <- mean((Y_test-Y_test_hat_LGS)^2)
rmse_LGS<-sqrt(mse_LGS)
errors$rmse_UNIF_kernel[i] <-rmse_UNIF_kernel
errors$rmse_L2MS[i]  <-rmse_L2MS
errors$rmse_LGMS[i]  <-rmse_LGMS
errors$rmse_UNIF[i] <- rmse_UNIF
errors$rmse_GMS[i]  <- rmse_GMS
errors$rmse_LGS[i]  <- rmse_LGS
}
mean_errors <- colMeans(errors)
write.csv(errors, file = "errors_output.csv", row.names = FALSE)
# 计算每种方法RMSE的标准差
sd_errors <- apply(errors, 2, sd)
# 打印均值和标准差结果到控制台
cat("\n--- Error Summary (RMSE) ---\n")
cat("Mean RMSE for each method:\n")
print(mean_errors)
cat("\nStandard Deviation of RMSE for each method:\n")
print(sd_errors)
# (可选步骤) 将均值和标准差合并到一个更易读的汇总数据框中
summary_statistics <- data.frame(
Method = names(mean_errors), # 获取方法名作为单独一列
Mean_RMSE = mean_errors,      # 均值列
SD_RMSE = sd_errors,          # 标准差列
row.names = NULL              # 清除数据框的默认行名，让 "Method" 列更突出
)
print(summary_statistics)
# (可选步骤) 将这个汇总统计数据框保存到新的CSV文件
write.csv(summary_statistics, file = "rmse_summary_statistics.csv", row.names = FALSE)
