EM_ridge_svd = function(formula, data, e = 1e-5, prior, model = "gaussian", numeric_optim = F, bign = F){
  
  rv = list()
  
  # -------------------------------------------------------------------
  # Process and set up the data from the model formula
  rv$terms <- stats::terms(x = formula, data = data)
  
  mf = stats::model.frame(formula = formula, data = data)
  rv$target.var = names(mf)[1]
  
  y = mf[,1]
  X = stats::model.matrix(formula, data=data)
  
  # Convert to a numeric matrix and drop the target variable
  X = as.matrix(X)
  X = X[,-1,drop=FALSE]
  
  n = nrow(X)
  p = ncol(X)
  
  std.X  = my.standardise(X, length = "n")
  I.keep = std.X$std.X!=0
  X      = std.X$X[,I.keep]
  
  y.mu  = 0
  y.sd  = 1
  if (model == "gaussian")
  {
    y.mu  = mean(y)
    y     = y - y.mu
    y.sd  = sqrt( mean( (y-mean(y))^2 ) )
    y     = y / y.sd
  }
  
  start_time_preprocessing <- Sys.time()
  if(bign & n>p){
    
    xtx = crossprod(X)
    xty = crossprod(X,y)
    
    eig = eigen(xtx, symmetric = T)
    
    v = eig$vectors
    #s2 = abs(eig$values)
    s2 = eig$values
    c = t(v)%*%xty # faster than crossprod
    
  }else{
    
    usv = svd(X)
    u = usv$u
    s = usv$d
    v = usv$v
    
    c = (t(u)%*%y)*s
    s2 = s^2
    
  }
  
  b = c/s2
  EB2 = sum(b^2)

  #yty = crossprod(y) # apparently crossprod slower? but probably not significant
  yty = t(y) %*% y
  
  end_time_preprocessing <- Sys.time()
  
  rv$preprocessing_time = end_time_preprocessing - start_time_preprocessing
  
  sigma2 = var(y)
  tau2 = 1
  
  RSS = 1e10
  i = 0
  
  #EM loop
  
  while (T) {
    
    beta.mu.old = b
    RSS.old = RSS
    b = c/(s2 + 1/tau2)
    Eb2 = b^2
    
    #ERSS
    RSS = (yty - 2*t(b)%*%c + t(Eb2)%*%s2)
    DOF = sum(s2/(s2 + 1/tau2))*sigma2 # trace((X'*X)*inv(X'*X+eye(p)/tau2))
    ERSS = RSS + DOF
    
    
    # E[sum(b^2)]
    Vb2 = sigma2*sum(1/(s2+1/tau2)) + sigma2*max(p-n,0)*tau2; # trace( sigma2*inv(X'*X+eye(p)/tau2) )
    EB2 = sum(Eb2) + Vb2
    
    if(!numeric_optim){
      
      if(prior == "tau"){
        
        tau2 = c((sqrt((EB2^2*n^2) + (ERSS^2*p^2) + 2*EB2*ERSS*(8 + 4*n + 4*p + n*p)) + EB2*n -ERSS*p)/(2*ERSS*(2+p)))
        
        sigma2 = c((ERSS*tau2 + EB2)/((n+p+2)*tau2))
        
      }else if(prior == "tau2"){
        
        tau2 = c((EB2*(-1 + n) - ERSS*(1 + p) + sqrt(4*EB2*ERSS*(1 + n)*(3 + p) + (EB2 + ERSS - EB2*n + ERSS*p)^2))/(2*ERSS*(3 + p)))
        sigma2 = c((ERSS*tau2 + EB2)/((n+p+2)*tau2))
        
      }
      
    }else{
      
      rv = find.tau2.sigma2.optim(EB2, n, ERSS, tau2, sigma2, prior, p)
      
      tau2 = rv$tau2
      sigma2 = rv$sigma2
      
    }
    
    
    # Termination conditions
    delta = sum(abs(RSS-RSS.old)) / (1+sum(abs((RSS))))
    if ((delta < e)|| i>1e4)
    {
      break
    }
    
    i = i+1
    
  }
  
  b = c/(s2 + 1/tau2)
  ## Transform back and unstandardise
  b0 = mean(y) + y.mu
  b = v%*%b
  
  rv$beta = as.matrix(apply(t(b), 1, function(x)(x / std.X$std.X[I.keep] * y.sd)))
  rv$beta0 = b0 - std.X$mean.X[I.keep] %*% rv$beta
  rv$num.iter = i
  rv$tau2 = tau2
  rv$sigma2 = sigma2*(y.sd^2)
  rv$model = model
  rv$I.keep = I.keep
  
  return(rv)
}

find.tau2.sigma2.optim <- function(Eb2,n,E.RSS,tau2=1, sigma2, prior, p)
{
  
  min.par = exp((optim( log(c(tau2,sigma2)), function(x){ tau2.sigma2.optim(x, Eb2, n, E.RSS, prior, p) }, method = "BFGS"))$par)
  
  tau2 = min.par[1]
  sigma2 = min.par[2]
  
  return(list(tau2=tau2,sigma2=sigma2))  
}


tau2.sigma2.optim <- function(x, Eb2, n, Erss, prior, p)
{
  tau2 = exp(x[1])
  sigma2 = exp(x[2])
  
  f = p/2*log(tau2) + 1/2/sigma2/tau2*Eb2 # beta 
  f = f + (n+p)/2*log(sigma2) + Erss/2/sigma2 + log(sigma2) # data model
  
  if(prior %in% c("tau")){
    
    f = f + log(1+tau2)
    
  }else if (prior %in% c("tau2")){
    
    f = f + (1/2)*log(tau2) + log(1+tau2)
    
  }
  
  f
}

fastLOO <- function(formula, data, lambda = NULL, glm_lambda = F, standardize = T, bign = F){
  
  rv = list()
  
  # -------------------------------------------------------------------
  # Process and set up the data from the model formula
  rv$terms <- stats::terms(x = formula, data = data)
  
  mf = stats::model.frame(formula = formula, data = data)
  rv$target.var = names(mf)[1]
  
  y = mf[,1]
  X = stats::model.matrix(formula, data=data)
  
  # Convert to a numeric matrix and drop the target variable
  X = as.matrix(X)
  X = X[,-1,drop=FALSE]
  
  n = nrow(X)
  p = ncol(X)
  
  if(glm_lambda){
    
    sd_y = c(sqrt(var(y)*(n-1)/n))
    glmlam = lambda
    lambda = (lambda*n/sd_y)
    lambda[1] = lambda[1]*2e32
    
  }
  
  # Find lambda sequence(if not given)
  if(is.null(lambda)){
    glm_lambda = T
    
    sd_y = c(sqrt(var(y)*(n-1)/n))
    ridge_glm = glmnet(x = X, y = y, family = "gaussian", alpha = 0, thresh = 1e-31, standardize = standardize)
    glmlam = ridge_glm$lambda
    lambda = (ridge_glm$lambda*n/sd_y)
    #lambda = glmlam
    lambda[1] = lambda[1]*2e32
  }
  
  lambda_len = length(lambda)
  LOO_score = matrix(NA, lambda_len)
  
  std.X  = my.standardise(X, length = "n")
  I.keep = std.X$std.X!=0

  if(standardize){
    
    X      = std.X$X[,I.keep]
    
    y.mu  = mean(y)
    y     = y - y.mu
    y.sd  = sqrt( mean( (y-mean(y))^2 ) )
    y     = y / y.sd
    
  }
  
  start_time_preprocessing <- Sys.time()
  if(bign & n>p){
    
    xtx = crossprod(X)
    eig = eigen(xtx, symmetric = T)

    v = eig$vectors
    #s2 = pmax(eig$values, 1e-32)
    s2 = eig$values
    s = sqrt(s2)
    R = X%*%v
    u = (R * rep(1/s, rep(nrow(R), length(s))))

    #s2[which(s2 == 1e-32)] = 0

    c = (t(R)%*%y)
    
    # usv = svd(xtx)
    # s2 = usv$d
    # v = usv$v
    # s = sqrt(s2)
    # 
    # R = X%*%v
    # u = (R * rep(1/s, rep(nrow(R), length(s))))
    # 
    # c = (t(R)%*%y)
    
  }else{
    
    usv = svd(X)
    u = usv$u
    s = usv$d
    v = usv$v
    
    s2 = s^2
    R = u * rep(s, rep(nrow(u), length(s)))
    
    c = (t(u)%*%y)*s
  }
  
  end_time_preprocessing <- Sys.time()
  
  rv$preprocessing_time = end_time_preprocessing - start_time_preprocessing
  
  
  for(i in 1:lambda_len){
    
    lam = lambda[i]
    
    W = s2/(s2+lam)

    uW = u * rep(W, rep(nrow(u), length(W)))
    hat_svd_diag = rowSums(uW*u)
    
    alpha = c/(s2 + lam)
    e = y - R%*%alpha
    
    LOO_score[i] = mean( (e/(1-hat_svd_diag))^2 )
    
  }
  
  which.lambda = which.min(LOO_score)
  lambda_min = lambda[which.lambda]

  ## Transform back and unstandardise
  b0 = mean(y)
  b = v%*%(c/(s2 + lambda_min))
  
  if(standardize){
    
    b0 = b0 + y.mu
    rv$beta = as.matrix(apply(t(b), 1, function(x)(x / std.X$std.X[I.keep] * y.sd)))
    rv$beta0 = b0 - std.X$mean.X[I.keep] %*% rv$beta
    
  }else{
    
    rv$beta = b
    rv$beta0 = b0
    
  }
  
  rv$model = "gaussian"
  rv$I.keep = I.keep
  
  if(glm_lambda){
    
    rv$glm.lambda.min = glmlam[which.lambda]
    rv$lambda.min = lambda[which.lambda]
    rv$glm.lambda = glmlam
    rv$lambda = lambda
    
  }else{
    
    rv$lambda.min = lambda[which.lambda]
    rv$lambda = lambda
    
  }
  
  
  rv$errors = LOO_score
  rv$num.iter = lambda_len
  
  return(rv)
  
}



# ============================================================================================================================
my.standardise <- function(X, length = "n")
{
  n = nrow(X)
  p = ncol(X)
  
  # 
  r       = list()
  r$X     = X
  if (p > 1)
  {
    r$mean.X = colMeans(X)
  } else
  {
    r$mean.X = mean(X)
  }
  
  if(length == "n"){
    r$std.X  = sqrt( t(apply(X,2,stats::sd))^2*(n-1)/n ) # SSE length n
  }else{
    r$std.X  = t(apply(X,2,stats::sd)) * sqrt(n-1) # SSE length 1
  }
  
  
  # Perform the standardisation
  if (p == 1)
  {
    r$X <- as.matrix(apply(X,1,function(x)(x - r$mean.X)))
    r$X <- as.matrix(apply(r$X,1,function(x)(x / r$std.X)))
  } else
  {
    r$X <- t(as.matrix(apply(X,1,function(x)(x - r$mean.X))))
    r$X <- t(as.matrix(apply(r$X,1,function(x)(x / r$std.X))))
  }
  
  return(r)
}


em.predict <- function(object, newdata)
{
  # Build the fully specified formula using the covariates that were fitted
  f <- stats::as.formula(paste("~",paste(attr(object$terms,"term.labels"),collapse="+")))
  
  # Extract the design matrix
  X = stats::model.matrix(f, data=newdata)
  X = as.matrix(X[,-1, drop = F])
  X = X[,object$I.keep]
  n = nrow(X)
  p = ncol(X)
  
  # Make predictions
  yp = X %*% object$beta + as.numeric(object$beta0)
  
  # If GLM
  if (object$model == "binomial")
  {
    yp = 1/(1+exp(-yp))
  }
  else if (object$model == "poisson")
  {
    yp = exp(yp)
  }
  
  yp
}


