K_norms_train <-sqrt(rowSums( K_train^2))
rm(K_train) # 删除核矩阵
#########################2014############
# 抽样概率
G<-exp(res0^2)
max_iter <- 1 #（迭代次数）
tolerance <- 0.001
iter <- 0
while(iter < max_iter) {
iter <- iter + 1
## 开始抽样 小梯度markov
L2MS_result<-LGMS(X_train,Y_train, G, 1, burn, n_val)
X_L2MS<-L2MS_result$X_selected
Y_L2MS<-L2MS_result$Y_selected
indices_L2MS<-L2MS_result$indices
#sigma_est_L2MS <- sigest(X_L2MS)  # 返回 c(min, med, max)
#sigma_opt_L2MS <- as.numeric(sigma_est_L2MS[2])
# 拟合最终模型
fit_final_L2MS <- krls(X = X_L2MS, y = Y_L2MS,derivative=FALSE,vcov= TRUE)
ps_L2MS <- intersect(indices_L2MS, filled_rows )
psr_L2MS <- 1-length(ps_L2MS)/n_val
}
##############################我们的方法####################
G_KRMS<-res0/K_norms_train
max_iter <- 15#（迭代次数）
tolerance <- 0.001
iter <- 0
diff_norm<-Inf
fit_final_KRMS <- NULL
while(iter < max_iter & diff_norm > tolerance) {
iter<- iter + 1
#cat(sprintf("Attempting LGMS (Our Method) iteration %d for q=%.1f, n_val=%d\n", iter_lgms, q, n_val))
# 使用 tryCatch 来包裹可能出错的代码块
# possible_outcome 会存储 tryCatch 块的返回值
possible_outcome <- tryCatch({
# 1. 进行LGMS抽样
KRMS_result <- LGMS(X_train, Y_train, G_KRMS, 1, burn, n_val)
X_KRMS <- KRMS_result$X_selected
Y_KRMS <- KRMS_result$Y_selected
# 2. 在调用krls之前，主动检查 Y_LGMS_iter 是否为常数
if (length(unique(Y_KRMS)) < 2) {
# 如果是常数，记录警告并主动抛出一个特定错误，会被tryCatch捕获
warning(sprintf("KRMS (Our Method) iter %d for q=%.1f, n_val=%d: Y_KRMS is constant. Will use previous estimate.", iter, q, n_val))
stop("Y_KRMS_is_constant_in_iter") # 自定义错误信息
}
# 3. 如果 Y_LGMS_iter 不是常数，拟合krls模型
current_fit_KRMS <- krls(X = X_KRMS, y = Y_KRMS, derivative=FALSE, vcov=FALSE)
# 4. 如果成功，返回包含模型和所用X子样的列表
list(success = TRUE, fit = current_fit_KRMS, X_subsample = X_KRMS)
}, error = function(e) {
# 5. 如果发生任何错误 (包括我们主动抛出的 "Y_KRMS_is_constant_in_iter")
warning(sprintf("Error or Y_KRMS constant in KRMS (Our Method) iter %d for q=%.1f, n_val=%d: %s. Using previous estimate for KRMS method.", iter, q, n_val, e$message))
# 返回一个表示失败的列表
list(success = FALSE, error_message = e$message)
})
# 根据 tryCatch 的结果进行处理
if (possible_outcome$success) {
# 本次迭代成功
fit_final_KRMS <- possible_outcome$fit # 更新最终的KRMS模型
X_KRMS <- possible_outcome$X_subsample # 获取本次成功的X子样本
# 更新下一次迭代所需的 K_norms_train_current, res_current, G_LGMS_current, diff_norm_lgms
K_train<- kernelMatrix(rbf_kernel, X_train, X_KRMS)
K_norms_train <- sqrt(rowSums(K_train^2))
K_norms_train[K_norms_train == 0] <- 1e-9 # 避免除以零
rm(K_train)
Yhat_train_new <- predict(fit_final_KRMS, newdata = X_train)$fit
res<- abs(Y_train - Yhat_train_new)
G_LGMS<- res / K_norms_train
# diff_norm_lgms 比较的是当前LGMS模型与最初UNIF_kernel模型在训练集上的预测差异
diff_norm <- mean((Yhat_train - Yhat_train_new)^2)
} else {
# 本次迭代失败 (Y_LGMS是常数或krls其他错误)
# 根据你的要求，我们需要停止这个 while 循环，并使用上一次的估计
# fit_final_LGMS 此时仍然是上一次成功迭代的结果 (或者最初的 fit_final_f0)
cat(sprintf("KRMS (Our Method) while loop stopped at iteration %d for q=%.1f, n_val=%d due to error/constant Y. Final LGMS model is from previous successful state.\n", iter, q, n_val))
break # 跳出 while 循环
}
}
####################线性方法#####################
######用线性回归模型拟合均匀抽样的结果来获得初始beta
#合并Y和X的数据
#train_UNIF <- data.frame(Y = Y_UNIF,  X_UNIF )
# 2. 拟合线性回归模型
# model_UNIF <- lm(Y ~ ., data = train_UNIF)
#X_train_df<- as.data.frame(X_train)
#Y_train_hat_UNIF <- predict(model_UNIF, newdata = X_train_df)
beta_UNIF = solve(t(X_UNIF)%*%X_UNIF)%*%t(X_UNIF)%*% Y_UNIF
Y_train_hat_UNIF <- X_train%*%beta_UNIF
########## GMS ########
# 计算梯度
G <- gradient(X_train, Y_train, Y_train_hat_UNIF, p_val)$G
GMS_result <- GMS(X_train, Y_train, G, 1,burn, n_val)
X_GMS <- GMS_result$X_selected
Y_GMS <- GMS_result$Y_selected
#train_GMS <- data.frame(Y = Y_GMS,  X_GMS )
#model_GMS <- lm(Y ~ ., data = train_GMS)
beta_GMS = solve(t(X_GMS)%*%X_GMS)%*%t(X_GMS)%*% Y_GMS
########## LGS  #############
iter <- 0
diff_norm <- Inf
max_iter <- 15
tolerance <- 0.001
Y_hat_prev_LGS <-Y_train_hat_UNIF# 上一次迭代的系数估计值
beta_former <- beta_UNIF # 初始化 beta_former
nu <- rep(0, p_val)
Gradient_result <- gradient( X_train, Y_train, Y_hat_prev_LGS, p_val)
G<-Gradient_result$G
while(iter < max_iter & diff_norm > tolerance) {
iter <- iter + 1
# LGS
LGS_result <- LGS(X_train, Y_train, G, 0,  n_val,N_train)
X_LGS <- LGS_result$X_selected
Y_LGS <- LGS_result$Y_selected
#train_LGS <- data.frame(Y = Y_LGS,  X_LGS )
#model_LGS<- lm(Y ~ ., data = train_LGS)
#Y_hat_new_LGS<-predict(model_LGS, newdata = X_train_df)
beta.hat = solve( t( X_LGS)%*% X_LGS/n_val + diag(nu) ) %*%
( t( X_LGS)%*% Y_LGS/n_val + diag(nu)%*%beta_former )
Y_hat_new_LGS<-X_train%*%beta.hat
#更新梯度矩阵
Gradient_result <- gradient(X_train, Y_train, Y_hat_new_LGS, p_val)
G<-Gradient_result$G
G_matrix<-Gradient_result$G_matrix
#求梯度矩阵的均值
mu <- G_matrix |> apply(2, mean)
#更新nu
nu <- c * (iter / log(1+abs(mu)))^d
# 计算差的二范数
diff_norm <- mean((Y_hat_prev_LGS - Y_hat_new_LGS)^2)
# 更新估计值
Y_hat_prev_LGS <-Y_hat_new_LGS
beta_former = beta.hat
}
#核方法下的均匀抽样
Y_test_hat_UNIF_kernel<-predict(fit_final_f0, newdata = X_test)$fit
mse_UNIF_kernel  <- mean((Y_test-Y_test_hat_UNIF_kernel)^2)
rmse_UNIF_kernel <-sqrt(mse_UNIF_kernel)
#核方法2014
Y_test_hat_L2MS<-predict(fit_final_L2MS, newdata = X_test)$fit
mse_L2MS  <- mean((Y_test-Y_test_hat_L2MS)^2)
rmse_L2MS  <- sqrt( mse_L2MS)
# 迭代停止计算我们方法测试集的MSE
Y_test_hat_LGMS<-predict(fit_final_KRMS, newdata = X_test)$fit
mse_LGMS  <- mean((Y_test-Y_test_hat_LGMS)^2)
rmse_LGMS<-sqrt(mse_LGMS)
##############线性模型下的#########
#X_test_df<-as.data.frame(X_test)
#均匀抽样在测试集上的MSE
#Y_test_hat_UNIF<-predict(model_UNIF, newdata = X_test_df)
#mse_UNIF<-mean((Y_test-Y_test_hat_UNIF)^2)
#rmse_UNIF<-sqrt(mse_UNIF)
#GMS在测试集上的MSE
#Y_test_hat_GMS<-predict(model_GMS, newdata = X_test_df)
#mse_GMS  <- mean((Y_test-Y_test_hat_GMS)^2)
#rmse_GMS<-sqrt(mse_GMS)
#LGS在测试集上的MSE
#Y_test_hat_LGS<-predict(model_LGS, newdata = X_test_df)
#mse_LGS  <- mean((Y_test-Y_test_hat_LGS)^2)
#rmse_LGS<-sqrt(mse_LGS)
#均匀抽样在测试集上的MSE
Y_test_hat_UNIF<-X_test%*%beta_UNIF
mse_UNIF<-mean((Y_test-Y_test_hat_UNIF)^2)
rmse_UNIF<-sqrt(mse_UNIF)
#GMS在测试集上的MSE
Y_test_hat_GMS<-X_test%*%beta_GMS
mse_GMS  <- mean((Y_test-Y_test_hat_GMS)^2)
rmse_GMS<-sqrt(mse_GMS)
#LGS在测试集上的MSE
Y_test_hat_LGS<-X_test%*%beta_LGS
mse_LGS  <- mean((Y_test-Y_test_hat_LGS)^2)
rmse_LGS<-sqrt(mse_LGS)
errors_current_combination$rmse_UNIF_kernel[i] <-rmse_UNIF_kernel
errors_current_combination$rmse_L2MS[i]  <-rmse_L2MS
errors_current_combination$rmse_KRMS[i]  <-rmse_KRMS
errors_current_combination$rmse_UNIF[i] <- rmse_UNIF
errors_current_combination$rmse_GMS[i]  <- rmse_GMS
errors_current_combination$rmse_LGS[i]  <- rmse_LGS
cat(sprintf("Finished iteration %d for q=%.1f, n_val=%d\n", i, q, n_val))
}
# MODIFICATION: 将此 (q, n_val) 组合的错误数据框存储到列表中
all_runs_error_list[[paste0("q", q, "_n", n_val)]] <- errors_current_combination
# MODIFICATION: 计算并存储此 (q, n_val) 组合的摘要统计信息
mean_errors_current <- colMeans(errors_current_combination[, -(1:3)]) # Exclude q, n, iter cols
sd_errors_current <- apply(errors_current_combination[, -(1:3)], 2, sd)
summary_stats_current <- data.frame(
q_value = q,
n_value = n_val,
Method = names(mean_errors_current),
Mean_RMSE = mean_errors_current,
SD_RMSE = sd_errors_current,
row.names = NULL
)
all_summary_stats_list[[paste0("q_summary", q, "_n_summary", n_val)]] <- summary_stats_current
cat(sprintf("\n--- Summary for q = %.1f, n_val = %d ---\n", q, n_val))
print(summary_stats_current)
} # End of n_val loop
} # End of q loop
library(MASS)
library(glmnet)
library(Matrix)
library(foreach)
library(doParallel)
library(doRNG)
library(kernlab)
library(KRLS)
library(caret)
library(tidyverse)
library(lubridate)
setwd("~/Desktop/Program/R_project/Markov_subsampling/kernel/realdata2")
source("function.R")
set.seed(521)
burn<-500
q_values   <- c(0,0.1,0.2,0.3)
n_vals     <- c(500,1000,1500)
d = 1## LGS的参数
a = b = 1
c = 0.01 ##LGS 的参数  a
M <- 1
col_names <- c(
"Month", "Day", "Year", "Hour",
"CO_GT", "PT08_S1", "NMHC_GT", "C6H6_GT", "PT08_S2",
"NOx_GT", "PT08_S3", "NO2_GT", "PT08_S4", "PT08_S5",
"Temperature", "RelativeHumidity", "AbsoluteHumidity",
"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"
)
raw <- read_csv("AirQuality_clean.csv", col_names = FALSE, col_types = cols())
colnames(raw) <- col_names
# 3. Basic cleaning & timestamp
air <- raw %>%
mutate(across(everything(), ~ na_if(.x, -200))) %>%   # replace sentinel -200 with NA
mutate(Datetime = make_datetime(Year, Month, Day, Hour)) %>%
relocate(Datetime)                                     # move Datetime to front
target_pollutant <- "NO2_GT"
air <- air %>%
mutate(
Hour_sin   = sin(2 * pi * Hour   / 24),
Hour_cos   = cos(2 * pi * Hour   / 24),
Month_sin  = sin(2 * pi * Month  / 12),
Month_cos  = cos(2 * pi * Month  / 12)
)
air_model <- air %>% drop_na(all_of(target_pollutant))
selected_features <- c(
"Month", "Hour",
"PT08_S1", "PT08_S2", "PT08_S3", "PT08_S4", "PT08_S5",
"Temperature", "RelativeHumidity", "AbsoluteHumidity"
)
feature_cols <- selected_features
all_runs_error_list <- list()
all_summary_stats_list <- list()
for (q in q_values) {
for (n_val in n_vals) {
errors_current_combination <- data.frame(
q_value = rep(q, M),
n_value = rep(n_val, M),
iteration = 1:M,
rmse_UNIF_kernel= numeric(M) ,
rmse_L2MS= numeric(M),
rmse_KRMS= numeric(M),
rmse_UNIF = numeric(M),
rmse_GMS = numeric(M),
rmse_LGS = numeric(M))
for (i in 1:M) {
set.seed(i)
# 分割为训练集 (80%) 和测试集 (20%)
N<-nrow(air_model )
train_index <- sample(1:N, size = 0.7 * N)
train_data <- air_model [train_index, ]
test_data <- air_model [-train_index, ]
X_train <- data.matrix(subset(train_data,select=feature_cols ))
Y_train <- data.matrix(subset(train_data,select=target_pollutant))
X_test <- data.matrix(subset(test_data,select=feature_cols ))
Y_test <- data.matrix(subset(test_data,select=target_pollutant))
# 8. 数据标准化 (Scaling)
# 计算训练集的均值和标准差
train_mean <- apply(X_train, 2, mean)
train_sd <- apply(X_train, 2, sd)
# 标准化训练集
X_train<- scale(X_train, center = train_mean, scale = train_sd)
# 使用训练集的均值和标准差来标准化测试集
X_test<- scale(X_test, center = train_mean, scale = train_sd)
Y_mean <- mean(Y_train)
Y_sd   <- sd(Y_train)
Y_train <- (Y_train - Y_mean) / Y_sd        # 训练标签（标准化）
Y_test  <- (Y_test  - Y_mean) / Y_sd        # 测试标签仅备用
p_val<-ncol(X_train)
N_train<- nrow(X_train)
W <- matrix(0, nrow = N_train, ncol = p_val)
O<-matrix(0,nrow=N_train,ncol=1)
# # 计算需要填充的行数
num_filled_rows <- ceiling(q * N_train)
filled_rows <- sample(1:N_train, num_filled_rows)
#corruption
W[filled_rows, ] <- matrix(rnorm(num_filled_rows * p_val, mean=-10,sd=sqrt(3)), nrow = num_filled_rows, ncol = p_val)
O[filled_rows,]<- matrix(rnorm(num_filled_rows*1,mean=-3,sd=sqrt(3)), nrow = num_filled_rows, ncol = 1)
X_train[filled_rows,]<-W[filled_rows, ]
Y_train[filled_rows,]<-O[filled_rows,]
rbf_kernel  <- rbfdot(sigma =0.01)
####################### 均匀抽样###################
idx <- sample(1: N_train, n_val, replace = FALSE)
# 使用相同的索引来选取
X_UNIF = X_train[idx, ]
Y_UNIF = Y_train[idx, ]
#sigma_est_UNIF<- sigest(X_UNIF)  # 返回 c(min, med, max)
#sigma_opt_UNIF <- as.numeric(sigma_est_UNIF[2])
# 拟合最终模型(核方法)
fit_final_f0 <- krls(X = X_UNIF, y = Y_UNIF,derivative = FALSE, vcov= TRUE)
Yhat_train <- predict(fit_final_f0, newdata = X_train)$fit
## 计算残差
res0<-abs(Y_train-Yhat_train)
## 计算残差平方
res0_2<-res0^2
##计算训练集在初始模型下的核矩阵
K_train<-kernelMatrix(rbf_kernel,X_train,X_UNIF)
# 计算每个样本的核范数，即计算每行的L2范数
K_norms_train <-sqrt(rowSums( K_train^2))
rm(K_train) # 删除核矩阵
#########################2014############
# 抽样概率
G<-exp(res0^2)
max_iter <- 1 #（迭代次数）
tolerance <- 0.001
iter <- 0
while(iter < max_iter) {
iter <- iter + 1
## 开始抽样 小梯度markov
L2MS_result<-LGMS(X_train,Y_train, G, 1, burn, n_val)
X_L2MS<-L2MS_result$X_selected
Y_L2MS<-L2MS_result$Y_selected
indices_L2MS<-L2MS_result$indices
#sigma_est_L2MS <- sigest(X_L2MS)  # 返回 c(min, med, max)
#sigma_opt_L2MS <- as.numeric(sigma_est_L2MS[2])
# 拟合最终模型
fit_final_L2MS <- krls(X = X_L2MS, y = Y_L2MS,derivative=FALSE,vcov= TRUE)
ps_L2MS <- intersect(indices_L2MS, filled_rows )
psr_L2MS <- 1-length(ps_L2MS)/n_val
}
##############################我们的方法####################
G_KRMS<-res0/K_norms_train
max_iter <- 15#（迭代次数）
tolerance <- 0.001
iter <- 0
diff_norm<-Inf
fit_final_KRMS <- NULL
while(iter < max_iter & diff_norm > tolerance) {
iter<- iter + 1
#cat(sprintf("Attempting LGMS (Our Method) iteration %d for q=%.1f, n_val=%d\n", iter_lgms, q, n_val))
# 使用 tryCatch 来包裹可能出错的代码块
# possible_outcome 会存储 tryCatch 块的返回值
possible_outcome <- tryCatch({
# 1. 进行LGMS抽样
KRMS_result <- LGMS(X_train, Y_train, G_KRMS, 1, burn, n_val)
X_KRMS <- KRMS_result$X_selected
Y_KRMS <- KRMS_result$Y_selected
# 2. 在调用krls之前，主动检查 Y_LGMS_iter 是否为常数
if (length(unique(Y_KRMS)) < 2) {
# 如果是常数，记录警告并主动抛出一个特定错误，会被tryCatch捕获
warning(sprintf("KRMS (Our Method) iter %d for q=%.1f, n_val=%d: Y_KRMS is constant. Will use previous estimate.", iter, q, n_val))
stop("Y_KRMS_is_constant_in_iter") # 自定义错误信息
}
# 3. 如果 Y_LGMS_iter 不是常数，拟合krls模型
current_fit_KRMS <- krls(X = X_KRMS, y = Y_KRMS, derivative=FALSE, vcov=FALSE)
# 4. 如果成功，返回包含模型和所用X子样的列表
list(success = TRUE, fit = current_fit_KRMS, X_subsample = X_KRMS)
}, error = function(e) {
# 5. 如果发生任何错误 (包括我们主动抛出的 "Y_KRMS_is_constant_in_iter")
warning(sprintf("Error or Y_KRMS constant in KRMS (Our Method) iter %d for q=%.1f, n_val=%d: %s. Using previous estimate for KRMS method.", iter, q, n_val, e$message))
# 返回一个表示失败的列表
list(success = FALSE, error_message = e$message)
})
# 根据 tryCatch 的结果进行处理
if (possible_outcome$success) {
# 本次迭代成功
fit_final_KRMS <- possible_outcome$fit # 更新最终的KRMS模型
X_KRMS <- possible_outcome$X_subsample # 获取本次成功的X子样本
# 更新下一次迭代所需的 K_norms_train_current, res_current, G_LGMS_current, diff_norm_lgms
K_train<- kernelMatrix(rbf_kernel, X_train, X_KRMS)
K_norms_train <- sqrt(rowSums(K_train^2))
K_norms_train[K_norms_train == 0] <- 1e-9 # 避免除以零
rm(K_train)
Yhat_train_new <- predict(fit_final_KRMS, newdata = X_train)$fit
res<- abs(Y_train - Yhat_train_new)
G_LGMS<- res / K_norms_train
# diff_norm_lgms 比较的是当前LGMS模型与最初UNIF_kernel模型在训练集上的预测差异
diff_norm <- mean((Yhat_train - Yhat_train_new)^2)
} else {
# 本次迭代失败 (Y_LGMS是常数或krls其他错误)
# 根据你的要求，我们需要停止这个 while 循环，并使用上一次的估计
# fit_final_LGMS 此时仍然是上一次成功迭代的结果 (或者最初的 fit_final_f0)
cat(sprintf("KRMS (Our Method) while loop stopped at iteration %d for q=%.1f, n_val=%d due to error/constant Y. Final LGMS model is from previous successful state.\n", iter, q, n_val))
break # 跳出 while 循环
}
}
####################线性方法#####################
######用线性回归模型拟合均匀抽样的结果来获得初始beta
#合并Y和X的数据
#train_UNIF <- data.frame(Y = Y_UNIF,  X_UNIF )
# 2. 拟合线性回归模型
# model_UNIF <- lm(Y ~ ., data = train_UNIF)
#X_train_df<- as.data.frame(X_train)
#Y_train_hat_UNIF <- predict(model_UNIF, newdata = X_train_df)
beta_UNIF = solve(t(X_UNIF)%*%X_UNIF)%*%t(X_UNIF)%*% Y_UNIF
Y_train_hat_UNIF <- X_train%*%beta_UNIF
########## GMS ########
# 计算梯度
G <- gradient(X_train, Y_train, Y_train_hat_UNIF, p_val)$G
GMS_result <- GMS(X_train, Y_train, G, 1,burn, n_val)
X_GMS <- GMS_result$X_selected
Y_GMS <- GMS_result$Y_selected
#train_GMS <- data.frame(Y = Y_GMS,  X_GMS )
#model_GMS <- lm(Y ~ ., data = train_GMS)
beta_GMS = solve(t(X_GMS)%*%X_GMS)%*%t(X_GMS)%*% Y_GMS
########## LGS  #############
iter <- 0
diff_norm <- Inf
max_iter <- 15
tolerance <- 0.001
Y_hat_prev_LGS <-Y_train_hat_UNIF# 上一次迭代的系数估计值
beta_former <- beta_UNIF # 初始化 beta_former
nu <- rep(0, p_val)
Gradient_result <- gradient( X_train, Y_train, Y_hat_prev_LGS, p_val)
G<-Gradient_result$G
while(iter < max_iter & diff_norm > tolerance) {
iter <- iter + 1
# LGS
LGS_result <- LGS(X_train, Y_train, G, 0,  n_val,N_train)
X_LGS <- LGS_result$X_selected
Y_LGS <- LGS_result$Y_selected
#train_LGS <- data.frame(Y = Y_LGS,  X_LGS )
#model_LGS<- lm(Y ~ ., data = train_LGS)
#Y_hat_new_LGS<-predict(model_LGS, newdata = X_train_df)
beta.hat = solve( t( X_LGS)%*% X_LGS/n_val + diag(nu) ) %*%
( t( X_LGS)%*% Y_LGS/n_val + diag(nu)%*%beta_former )
Y_hat_new_LGS<-X_train%*%beta.hat
#更新梯度矩阵
Gradient_result <- gradient(X_train, Y_train, Y_hat_new_LGS, p_val)
G<-Gradient_result$G
G_matrix<-Gradient_result$G_matrix
#求梯度矩阵的均值
mu <- G_matrix |> apply(2, mean)
#更新nu
nu <- c * (iter / log(1+abs(mu)))^d
# 计算差的二范数
diff_norm <- mean((Y_hat_prev_LGS - Y_hat_new_LGS)^2)
# 更新估计值
Y_hat_prev_LGS <-Y_hat_new_LGS
beta_former = beta.hat
}
beta_LGS<-beta.hat
#核方法下的均匀抽样
Y_test_hat_UNIF_kernel<-predict(fit_final_f0, newdata = X_test)$fit
mse_UNIF_kernel  <- mean((Y_test-Y_test_hat_UNIF_kernel)^2)
rmse_UNIF_kernel <-sqrt(mse_UNIF_kernel)
#核方法2014
Y_test_hat_L2MS<-predict(fit_final_L2MS, newdata = X_test)$fit
mse_L2MS  <- mean((Y_test-Y_test_hat_L2MS)^2)
rmse_L2MS  <- sqrt( mse_L2MS)
# 迭代停止计算我们方法测试集的MSE
Y_test_hat_LGMS<-predict(fit_final_KRMS, newdata = X_test)$fit
mse_LGMS  <- mean((Y_test-Y_test_hat_LGMS)^2)
rmse_LGMS<-sqrt(mse_LGMS)
##############线性模型下的#########
#X_test_df<-as.data.frame(X_test)
#均匀抽样在测试集上的MSE
#Y_test_hat_UNIF<-predict(model_UNIF, newdata = X_test_df)
#mse_UNIF<-mean((Y_test-Y_test_hat_UNIF)^2)
#rmse_UNIF<-sqrt(mse_UNIF)
#GMS在测试集上的MSE
#Y_test_hat_GMS<-predict(model_GMS, newdata = X_test_df)
#mse_GMS  <- mean((Y_test-Y_test_hat_GMS)^2)
#rmse_GMS<-sqrt(mse_GMS)
#LGS在测试集上的MSE
#Y_test_hat_LGS<-predict(model_LGS, newdata = X_test_df)
#mse_LGS  <- mean((Y_test-Y_test_hat_LGS)^2)
#rmse_LGS<-sqrt(mse_LGS)
#均匀抽样在测试集上的MSE
Y_test_hat_UNIF<-X_test%*%beta_UNIF
mse_UNIF<-mean((Y_test-Y_test_hat_UNIF)^2)
rmse_UNIF<-sqrt(mse_UNIF)
#GMS在测试集上的MSE
Y_test_hat_GMS<-X_test%*%beta_GMS
mse_GMS  <- mean((Y_test-Y_test_hat_GMS)^2)
rmse_GMS<-sqrt(mse_GMS)
#LGS在测试集上的MSE
Y_test_hat_LGS<-X_test%*%beta_LGS
mse_LGS  <- mean((Y_test-Y_test_hat_LGS)^2)
rmse_LGS<-sqrt(mse_LGS)
errors_current_combination$rmse_UNIF_kernel[i] <-rmse_UNIF_kernel
errors_current_combination$rmse_L2MS[i]  <-rmse_L2MS
errors_current_combination$rmse_KRMS[i]  <-rmse_KRMS
errors_current_combination$rmse_UNIF[i] <- rmse_UNIF
errors_current_combination$rmse_GMS[i]  <- rmse_GMS
errors_current_combination$rmse_LGS[i]  <- rmse_LGS
cat(sprintf("Finished iteration %d for q=%.1f, n_val=%d\n", i, q, n_val))
}
# MODIFICATION: 将此 (q, n_val) 组合的错误数据框存储到列表中
all_runs_error_list[[paste0("q", q, "_n", n_val)]] <- errors_current_combination
# MODIFICATION: 计算并存储此 (q, n_val) 组合的摘要统计信息
mean_errors_current <- colMeans(errors_current_combination[, -(1:3)]) # Exclude q, n, iter cols
sd_errors_current <- apply(errors_current_combination[, -(1:3)], 2, sd)
summary_stats_current <- data.frame(
q_value = q,
n_value = n_val,
Method = names(mean_errors_current),
Mean_RMSE = mean_errors_current,
SD_RMSE = sd_errors_current,
row.names = NULL
)
all_summary_stats_list[[paste0("q_summary", q, "_n_summary", n_val)]] <- summary_stats_current
cat(sprintf("\n--- Summary for q = %.1f, n_val = %d ---\n", q, n_val))
print(summary_stats_current)
} # End of n_val loop
} # End of q loop
