# This is the cross validation procedure.
# Kfold： the number of the fold in CV. 
# X.train, Y.train： training data.
# criterion : selection method for regularized parameter in transformed model
# correct : bias correction for prediction
# naive : choose wether produce the naive estimator
fun_cv=function(X.train,Y.train,Kfold,naive,criterion,correct)
{
  library(glmnet)
  library(parallel)
  library(pls)
  
  n_train = nrow(X.train)
  p = ncol(X.train)
  n_valid = floor(n_train/Kfold)
  
  X.train.cv=list()
  Y.train.cv=list()
  X.valid.cv=list()
  Y.valid.cv=list()
  
  err.valid=matrix(0,nrow=n_train,ncol=10)
  
  for (h in 1:Kfold)
  {  
    J_h=c((n_valid*(h-1)+1):(n_valid*h)) 
    X.train.cv[[h]]=X.train[-J_h,]  
    Y.train.cv[[h]]=Y.train[-J_h]
    X.valid.cv[[h]]=X.train[J_h,]
    Y.valid.cv[[h]]=Y.train[J_h]
    
    X.train.cv[[h]]=X.train.cv[[h]]-rep(1,(n_train-n_valid))%o%apply(X.train.cv[[h]],2,mean)
    Y.train.cv[[h]]=Y.train.cv[[h]]-mean(Y.train.cv[[h]])
    X.valid.cv[[h]]=X.valid.cv[[h]]-rep(1,n_valid)%o%apply(X.valid.cv[[h]],2,mean)
    Y.valid.cv[[h]]=Y.valid.cv[[h]]-mean(Y.valid.cv[[h]])
  }
  ################## validation part 
  for (h in 1:Kfold)
  {
    obj_lasso.cv=cv.glmnet('x'=X.train.cv[[h]],'y'=Y.train.cv[[h]],family='gaussian', alpha=1,nfolds=5,standardize=TRUE)  
    beta_lasso.cv=coef(obj_lasso.cv,s=obj_lasso.cv$lambda.min)[2:(p+1)]
    est_lasso.cv=cbind(rep(1,n_valid),X.valid.cv[[h]])%*%coef(obj_lasso.cv,s=obj_lasso.cv$lambda.min)[1:(p+1)]    
    
    obj_ridge.cv=cv.glmnet('x'=X.train.cv[[h]],'y'=Y.train.cv[[h]],family='gaussian', alpha=0,nfolds=5,standardize=TRUE)  
    beta_ridge.cv=coef(obj_ridge.cv,s=obj_ridge.cv$lambda.min)[2:(p+1)] 
    est_ridge.cv=cbind(rep(1,n_valid),X.valid.cv[[h]])%*%coef(obj_ridge.cv,s=obj_ridge.cv$lambda.min)[1:(p+1)]


    X.eig.cv=t(X.train.cv[[h]])%*%X.train.cv[[h]]/(n_train-n_valid)
    r.cv=which(cumsum(eigen(X.eig.cv)$values)/(sum(eigen(X.eig.cv)$values))>0.9)[1]
    eigvec.cv=eigen(X.eig.cv)$vectors[,1:r.cv]
    beta_score.cv=solve(t(X.train.cv[[h]]%*%eigvec.cv)%*%(X.train.cv[[h]]%*%eigvec.cv)+0.0001*diag(dim(X.train.cv[[h]]%*%eigvec.cv)[2]),t(X.train.cv[[h]]%*%eigvec.cv)%*%Y.train.cv[[h]])
    beta_pcr.cv=eigvec.cv%*%beta_score.cv
    est_pcr.cv=X.valid.cv[[h]]%*%eigvec.cv%*%beta_score.cv
    
    beta_ridgeless.cv = ginv(t(X.train.cv[[h]])%*%X.train.cv[[h]])%*%t(X.train.cv[[h]])%*%Y.train.cv[[h]]
    est_ridgeless.cv = X.valid.cv[[h]]%*%beta_ridgeless.cv
    
    pred.valid=parApply(cl=cl,X.valid.cv[[h]],1,fun_proj,(n_train-n_valid),p,X.train.cv[[h]],Y.train.cv[[h]],beta_lasso.cv,beta_ridge.cv,beta_ridgeless.cv,naive,criterion,correct)   
    pred.all=cbind(t(pred.valid),est_lasso.cv,est_ridge.cv,est_ridgeless.cv)
    
    err.valid[(n_valid*(h-1)+1):(n_valid*h),]=abs(Y.valid.cv[[h]]%o%rep(1,ncol(pred.all))-pred.all) 
  }
  mean_valid=apply(err.valid,2,mean)
  
  id.min.loc1=which.min(mean_valid[4:7])
  valid.loc1=c(4:7)[id.min.loc1]
  id.min.loc2=which.min(mean_valid[c(4,6)])
  valid.loc2=c(4,6)[id.min.loc2]
  id.min.loc3=which.min(mean_valid[c(1,2,4,5,7)])
  valid.loc3=c(1,2,4,5,7)[id.min.loc3]
  id.min.loc4=which.min(mean_valid[c(4,5,7)])
  valid.loc4=c(4,5,7)[id.min.loc4]
  
  id.min.glob1=which.min(mean_valid[8:10]) 
  id.min.glob2=which.min(mean_valid[8:9])
  
  return(c(valid.loc1,valid.loc2,valid.loc3,valid.loc4,id.min.glob1,id.min.glob2))
}