library('RSpectra')
library("TopicScore")
library('tidyverse')
library('trimcluster')
library('extremefit')
library('igraph')




# generate permutation list of [K]
perm <- function(v) {
  nn <- length(v)
  if (nn == 1) v
  else {
    X <- NULL
    for (i in 1:nn) X <- rbind(X, cbind(v[i], perm(v[-i])))
    X
  }
}
permk <- perm(1:k)



## SCORE+
SCOREplus <- function(A, k, c = 0.1, r = NULL){
  
  
  # if r not give, set to be k+1
  if (is.null(r)){
    fix.latent.dim = F
    r = k + 1
  } else {
    fix.latent.dim = T # otherwise latent dimension is given and fixed
  }
  
  n = nrow(A) # number of nodes
  degrees = rowSums(A)
  delta = c * max( degrees) # tunning parameter for graph laplacian
  d.inv = 1 / sqrt( delta + degrees )
  
  L.delta = t(d.inv * A) * d.inv # graph laplacian with ridge regularization
  
  # get top r eigenvectors
  eig.out = RSpectra::eigs(L.delta, k = r)
  eig.vec.w = eig.out$vectors %*% diag(eig.out$values) # reweight eigenvectors by eigen values
  
  # get ratio matrix 
  ratios = eig.vec.w[,2:r] / eig.vec.w[,1]
  
  if(!fix.latent.dim){
    # decide latent dimension by eigen-gap
    signal.weakness = 1 - eig.out$values[k+1] / eig.out$values[k]
    if ( signal.weakness > 0.1 ){
      ratios = ratios[,1:(k-1)]
    }
  }
  
  
  # k-means
  labels = kmeans(ratios, k, nstart = 100)$cluster
  
  return(list(labels = labels,
              ratios = ratios,
              delta = delta,
              eig.vec = eig.out$vectors,
              eig.val = eig.out$values))
}


cor2 <- function(x, y){
  if(sum(x * y) == 0){
    0
  }
  else{
    sum(x * y) / sqrt(sum(x * x)) / sqrt(sum(y * y))
  }
}


#different settings
distr <- c( 'pareto', 'gamma')
magnitu <- c('sqrt(n)', 'log(n)')
bal <- c('balanced', 'Imbalanced')



#main code
for(di in distr){
  for(ma in magnitu){
    for(ba in bal){
      #Initialization
      set.seed(10007)
      tot_iter <- 100
      n <- 500
      k <- 3
      
      n_L_s <- 1
      n_L_t <- 47
      n_step <- 10
      res_AngleMin <- rep(0, n_L_t) %o% rep(0, tot_iter) #AngleMin
      res_AngleMinPlus <- rep(0, n_L_t) %o% rep(0, tot_iter) #AngleMinPlus
      res_AngleMinSub <- rep(0, n_L_t) %o% rep(0, tot_iter) #AngleMinSub
      res_SNMF <- rep(0, n_L_t) %o% rep(0, tot_iter) #SNMF
      res_u <- rep(0, n_L_t) %o% rep(0, tot_iter) #Unsupervised (SCORE+)
      
      
      #Generating data
      for(iter in c(1:tot_iter)){
        theta <- rep(0, n)
        if(di == 'gamma'){
          theta <-  rgamma(n = n, shape = 3.5)
        }
        else{
          theta <- rpareto(n = n, a = 3.5)
        }
        if(ma == 'sqrt(n)'){
          theta <- sqrt(1 / sqrt(n)) * theta
        }
        else{
          theta <- sqrt(log(n) * log(n) / n) * theta
        }
        
        for(i in c(1:n)){
          theta[i] <- min(theta[i], 1)
        }
        Theta <- diag(theta)
        
        
        P <- matrix(runif(n = k * k, 0 , 1), nrow = k)
        for(i in c(1:k)){
          P[i, i] <- 1
        }
        P <- (P + t(P)) / 2
        
        
        Pi <- matrix(rep(0, n * k), nrow = n)
        for(i in c(1:n)){
          if(ba == 'balanced'){
            Pi[i, ] <- rmultinom(n = 1, size = 1, prob = rep(1 / k, k))
          }
          else{
            Pi[i, ] <- rmultinom(n = 1, size = 1, prob = c(0.2, 0.2, 0.6))
          }
        }
        
        O <- Pi %*% t(Pi)
        
        ED <- Theta %*% Pi %*% P %*% t(Pi) %*% Theta
        D <- matrix(rep(0, n * n), nrow = n)
        for(i in c(2:n)){
          for(j in c(1:(i-1))){
            D[i, j] <- rbinom(n = 1, size = 1, prob = ED[i, j])
          }
        }
        for(i in c(1:(n-1))){
          for(j in c((i+1):n)){
            D[i, j] <- D[j, i]
          }
        }
        
        
        
        #Unsupervised (SCORE+)
        center_comu <- rep(0, nrow(permk))
        clu_res_label <-  SCOREplus(D, k)[[1]]
        for(i in c(1:nrow(permk))){
          for(j in c(1:n)){
            center_comu[i] <- center_comu[i] + 1 - (permk[i, clu_res_label[j]] == which.max(Pi[j, ]))
          }
        }
        
        res_u[, iter] <- rep(min(center_comu) / n, n_L_t - n_L_s + 1)
        
        
        for(n_L_temp in c(n_L_s:n_L_t)){
          n_L <- n_L_temp * n_step
          n_U <- n - n_L
          
          
          #AngleMin
          V_AngleMin <- t(Pi[1:n_L, ]) %*% D[1:n_L, ]
          err_l <- rep(0, n)
          temp <- rep(0, k)
          
          
          for(i in c((n_L + 1):n)){
            if(sum(D[i, ]^2) ==0){
              err_l[i] <- rbinom(n = 1, size = 1, prob = 1 - 1 / k)
            }
            else{
              for(l in c(1:k)){
                temp[l] <- cor2(D[i, ], V_AngleMin[l, ])
              }
              err_l[i] <- 1 - Pi[i, which.max(temp)]
            }
          }
          res_AngleMin[n_L_temp, iter] <- sum(err_l[(n_L + 1):n]) / n_U
          
          
          
          
          #AngleMinSub
          V_AngleMinSub <- t(Pi[1:n_L, ]) %*% D[1:n_L, 1:n_L] %*% Pi[1:n_L, ]
          err_l_1 <- rep(0, n)
          temp_1 <- rep(0, k)
          
          for(i in c((n_L + 1):n)){
            tempi <- D[i, 1:n_L] %*% Pi[1:n_L, ]
            if(sum(tempi^2) ==0){
              err_l_1[i] <- rbinom(n = 1, size = 1, prob = 1 - 1 / k)
            }
            else{
              for(l in c(1:k)){
                temp_1[l] <- cor2(tempi, V_AngleMinSub[l, ])
              }
              err_l_1[i] <- 1 - Pi[i, which.max(temp_1)]
            }
          }
          res_AngleMinSub[n_L_temp, iter] <- sum(err_l_1[(n_L + 1):n]) / n_U
          
          
          
          
          #AngleMinPlus
          temp_res <- SCOREplus(D[(n_L + 1):n, (n_L + 1):n], k)
          clu_res_label <- temp_res[[1]]
          
          Pi_est_AngleMinPlus <- matrix(rep(0, 2 * n * k), nrow = n)
          Pi_est_AngleMinPlus[1:n_L, 1:k ] <- Pi[1:n_L, ]
          for(i in c((n_L + 1):n)){
            Pi_est_AngleMinPlus[i, k +  clu_res_label[i - n_L]] <- 1
          } 
          
          V_AngleMinPlus <- t(Pi_est_AngleMinPlus[1:n_L, ]) %*% D[1:n_L, ] %*%  Pi_est_AngleMinPlus 
          err_AngleMinPlus <- rep(0, n)
          temp <- rep(0, k)
          for(i in c(1:n)){
            tempwi2 <- as.vector(D[i, ] %*% Pi_est_AngleMinPlus)
            if(sum(tempwi2^2) ==0){
              err_AngleMinPlus[i] <- rbinom(n = 1, size = 1, prob = 1 - 1 / k)
            }
            else{
              for(l in c(1:k)){
                temp[l] <- cor2(tempwi2, V_AngleMinPlus[l, ])
              }
              err_AngleMinPlus[i] <- 1 - Pi[i, which.max(temp)]
            }
          }
          res_AngleMinPlus[n_L_temp, iter] <- sum(err_AngleMinPlus[(n_L + 1):n]) / n_U
          
  
          #SNMF
          O_L <- matrix(rep(0, n * n), nrow = n)
          O_L[1:n_L, 1:n_L] = O[1:n_L, 1:n_L]
          D_L <- diag(rowSums(O_L))
          
          H <- matrix(rpareto(n = n * k, a = 10), nrow = n)
          H <- diag(rowSums(H)^{-1}) %*% H
          lambda <- 1 
          
          for(ii in c(1:200)){
            H <- H * (D %*% H + 2 * lambda * O_L %*% H) / (H %*% t(H) %*% H + lambda * D_L %*% H)
          }
          
          H <- diag(rowSums(H)^{-1}) %*% H
          center_comu_h <- rep(0, nrow(permk))
          for(i in c(1:nrow(permk))){
            for(j in c((n_L+1):n)){
              center_comu_h[i] <- center_comu_h[i] + 1 - (permk[i, which.max(H[j, ])] == which.max(Pi[j, ]))
            }
          }
          
          res_SNMF[n_L_temp, iter] <- min(center_comu_h) / n_U
        }
      }
      
      
      #save the data
      file_path_save <- paste('Simulation_', di, '_', ma, '_', ba, '.Rdata', sep = '')
      save.image(file_path_save)            
    }
  }
}

