# Need the following packages
#install.packages("CVXR")
#install.packages("MASS")
#install.packages("huge")
#install.packages("matrixcalc")
#install.packages("ggplot2")
#install.packages("cowplot")

library(CVXR)
library(MASS)
library(huge)
library(matrixcalc)
library(ggplot2)
library(cowplot)


##############################################################################
######################  4 classes ######################
p<-1000
nvec<-c(10,20,30,40,55,70,85,100)
nlen<-length(nvec)
numcla<-4
labpos<-(numcla-1)/numcla
labneg<-(-1)/numcla

# I use eigenvectors of a covariance matrix as the mean vectors
# huge.generator is to create a covariance matrix
set.seed(22222)
dat00 <- huge.generator(n = 1000, d = p, graph = "random", v = NULL, u = NULL,
                        g = NULL, prob = NULL, vis = F, verbose = TRUE)
datmat00<-dat00$sigma
eig00<-eigen(datmat00)
mumator<-eig00$vectors[,1:numcla]  # To create the mean vectors
mumat<-1*(p^(0.5))*mumator
mumat01<-mumat


# To create the covariance matrix (diagonal here)
lbdvec<-rep(1,p) # Diagonal elements
covmat<-diag(lbdvec)

numsim<-100  # number of simulations

# the matrix below shows the number of constraints that do not satisfy equality for each sample
# so the number of equality is n*(numcla-1) - the number below

numnoninterpmat<-matrix(0,numsim,nlen)
numlabinterpmat<-matrix(0,numsim,nlen)


time01<-Sys.time()
for(m in 1:nlen){
  n<-nvec[m]
  onevec<-rep(1,n)
  for(k in 1:numsim){
    set.seed(9*k)
    
    # We first create Y, X and Q(noise) matrices
    ymat<-matrix(0,numcla,n) # Y matrix
    qmat<-t(mvrnorm(n,rep(0,p),covmat)) # the noise matrix
    xmat<-qmat
    labclass<-rep(0,n)
    # We create the probability vector and assign the label
    for (i in 1:n) {
      expprobv<-rep(0.01,numcla)
      for (j in 1:numcla) {
        expprobv[j]<-exp(t(xmat[,i])%*%mumat[,j])
      }
      expprobtt<-sum(expprobv)
      probvec<-expprobv/expprobtt
      labclass[i]<-sample(seq(1:numcla),size = 1,replace = TRUE,prob = probvec)
      lll<-labclass[i]
      ymat[lll,i]<-1
    }
    
    # Next we do optimization
    vmat<-Variable((p),numcla)
    obj<-Minimize(cvxr_norm(vmat,"fro")^2)
    constraint<-vector(mode = "list",length = (n*(numcla)))
    # Add constraints - need to use the list data structure
    for(i in 1:n){
      for (j in 1:(numcla)) {
        if(j!=labclass[i]){
          constraintone<-list(t(xmat[,i])%*%(vmat[,(labclass[i])]-vmat[,j])>=1)
          count01<-((i-1)*numcla)+j
          constraint[[count01]]<-constraintone[[1]]
        }
        else{
          constraintone<-list(abs(vmat[1,1]-vmat[1,1])<=0.001) # cannot put NULL as a constraint, so just add a 'trivial' constraint
          count01<-((i-1)*numcla)+j
          constraint[[count01]]<-constraintone[[1]]
        }
      }
    }
    probl <- Problem(obj, constraint)
    result <- solve(probl)
    mumat_est <- result$getValue(vmat) # get the solution
    
    # We finally compute the inner products (x^T%*%W) and count the number of inequalities
    numnoninterp<-0
    numlabinterp<-0
    for (i in 1:n) {
      inner01<-t(xmat[,i])%*%mumat_est[,labclass[i]]
      mumat_estcut<-mumat_est[,-(labclass[i])]
      for (j in 1:(numcla-1)) {
        innerneg<-t(xmat[,i])%*%mumat_estcut[,j]
        if(abs(inner01-(innerneg)-1)>=0.00001){
          numnoninterp<-numnoninterp+1
        }
        if((abs(inner01-labpos)<0.00001)&&(abs(innerneg-labneg)<0.00001)){
          numlabinterp<-numlabinterp+1
        }
      }
    }
    numnoninterpmat[k,m]<-numnoninterp
    numlabinterpmat[k,m]<-numlabinterp
  }
}

time02<-Sys.time()
time02-time01




#######################################################################################
#####################  3 classes ########################
p<-1000
nvec<-c(10,20,30,40,55,70,85,100)
nlen<-length(nvec)
numcla<-3
labpos<-(numcla-1)/numcla
labneg<-(-1)/numcla

# I use eigenvectors of a covariance matrix as the mean vectors
# huge.generator is to create a covariance matrix
set.seed(22222)
dat00 <- huge.generator(n = 1000, d = p, graph = "random", v = NULL, u = NULL,
                        g = NULL, prob = NULL, vis = F, verbose = TRUE)
datmat00<-dat00$sigma
eig00<-eigen(datmat00)
mumator<-eig00$vectors[,1:numcla]  # To create the mean vectors
mumat<-1*(p^(0.5))*mumator
mumat02<-mumat

# To create the covariance matrix (diagonal here)
lbdvec<-rep(1,p) # Diagonal elements
covmat<-diag(lbdvec)

numsim<-100  # number of simulations

# the matrix below shows the number of constraints that do not satisfy equality for each sample
# so the number of equality is n*(numcla-1) - the number below

numnoninterpmat02<-matrix(0,numsim,nlen)
numlabinterpmat02<-matrix(0,numsim,nlen)


time01<-Sys.time()
for(m in 1:nlen){
  n<-nvec[m]
  onevec<-rep(1,n)
  for(k in 1:numsim){
    set.seed(9*k)
    
    # We first create Y, X and Q(noise) matrices
    ymat<-matrix(0,numcla,n) # Y matrix
    qmat<-t(mvrnorm(n,rep(0,p),covmat)) # the noise matrix
    xmat<-qmat
    labclass<-rep(0,n)
    # We create the probability vector and assign the label
    for (i in 1:n) {
      expprobv<-rep(0.01,numcla)
      for (j in 1:numcla) {
        expprobv[j]<-exp(t(xmat[,i])%*%mumat[,j])
      }
      expprobtt<-sum(expprobv)
      probvec<-expprobv/expprobtt
      labclass[i]<-sample(seq(1:numcla),size = 1,replace = TRUE,prob = probvec)
      lll<-labclass[i]
      ymat[lll,i]<-1
    }
    
    # Next we do optimization
    vmat<-Variable((p),numcla)
    obj<-Minimize(cvxr_norm(vmat,"fro")^2)
    constraint<-vector(mode = "list",length = (n*(numcla)))
    # Add constraints - need to use the list data structure
    for(i in 1:n){
      for (j in 1:(numcla)) {
        if(j!=labclass[i]){
          constraintone<-list(t(xmat[,i])%*%(vmat[,(labclass[i])]-vmat[,j])>=1)
          count01<-((i-1)*numcla)+j
          constraint[[count01]]<-constraintone[[1]]
        }
        else{
          constraintone<-list(abs(vmat[1,1]-vmat[1,1])<=0.001) # cannot put NULL as a constraint, so just add a 'trivial' constraint
          count01<-((i-1)*numcla)+j
          constraint[[count01]]<-constraintone[[1]]
        }
      }
    }
    probl <- Problem(obj, constraint)
    result <- solve(probl)
    mumat_est <- result$getValue(vmat) # get the solution
    
    # We finally compute the inner products (x^T%*%W) and count the number of inequalities
    numnoninterp<-0
    numlabinterp<-0
    for (i in 1:n) {
      inner01<-t(xmat[,i])%*%mumat_est[,labclass[i]]
      mumat_estcut<-mumat_est[,-(labclass[i])]
      for (j in 1:(numcla-1)) {
        innerneg<-t(xmat[,i])%*%mumat_estcut[,j]
        if(abs(inner01-(innerneg)-1)>=0.00001){
          numnoninterp<-numnoninterp+1
        }
        if((abs(inner01-labpos)<0.00001)&&(abs(innerneg-labneg)<0.00001)){
          numlabinterp<-numlabinterp+1
        }
      }
    }
    numnoninterpmat02[k,m]<-numnoninterp
    numlabinterpmat02[k,m]<-numlabinterp
  }
}

time02<-Sys.time()
time02-time01





#########################################################################################
###################### 6 classes ########################
p<-1000
nvec<-c(10,20,30,40,55,70,85,100)
#nvec<-c(30)
nlen<-length(nvec)
numcla<-6
labpos<-(numcla-1)/numcla
labneg<-(-1)/numcla

# I use eigenvectors of a covariance matrix as the mean vectors
# huge.generator is to create a covariance matrix
set.seed(22222)
dat00 <- huge.generator(n = 1000, d = p, graph = "random", v = NULL, u = NULL,
                        g = NULL, prob = NULL, vis = F, verbose = TRUE)
datmat00<-dat00$sigma
eig00<-eigen(datmat00)
mumator<-eig00$vectors[,1:numcla]  # To create the mean vectors
mumat<-1*(p^(0.5))*mumator
mumat03<-mumat

#probvec<-rep(1/(numcla),numcla)  # To set the probabilities for each class

# To create the covariance matrix (diagonal here)
lbdvec<-rep(1,p) # Diagonal elements
covmat<-diag(lbdvec)

numsim<-100  # number of simulations

# the matrix below shows the number of constraints that do not satisfy equality for each sample
# so the number of equality is n*(numcla-1) - the number below

numnoninterpmat03<-matrix(0,numsim,nlen)
numlabinterpmat03<-matrix(0,numsim,nlen)


time01<-Sys.time()
for(m in 1:nlen){
  n<-nvec[m]
  onevec<-rep(1,n)
  for(k in 1:numsim){
    set.seed(9*k)
    
    # We first create Y, X and Q(noise) matrices
    ymat<-matrix(0,numcla,n) # Y matrix
    qmat<-t(mvrnorm(n,rep(0,p),covmat)) # the noise matrix
    xmat<-qmat
    labclass<-rep(0,n)
    # We create the probability vector and assign the label
    for (i in 1:n) {
      expprobv<-rep(0.01,numcla)
      for (j in 1:numcla) {
        expprobv[j]<-exp(t(xmat[,i])%*%mumat[,j])
      }
      expprobtt<-sum(expprobv)
      probvec<-expprobv/expprobtt
      labclass[i]<-sample(seq(1:numcla),size = 1,replace = TRUE,prob = probvec)
      lll<-labclass[i]
      ymat[lll,i]<-1
    }
    
    # Next we do optimization
    vmat<-Variable((p),numcla)
    obj<-Minimize(cvxr_norm(vmat,"fro")^2)
    constraint<-vector(mode = "list",length = (n*(numcla)))
    # Add constraints - need to use the list data structure
    for(i in 1:n){
      for (j in 1:(numcla)) {
        if(j!=labclass[i]){
          constraintone<-list(t(xmat[,i])%*%(vmat[,(labclass[i])]-vmat[,j])>=1)
          count01<-((i-1)*numcla)+j
          constraint[[count01]]<-constraintone[[1]]
        }
        else{
          constraintone<-list(abs(vmat[1,1]-vmat[1,1])<=0.001) # cannot put NULL as a constraint, so just add a 'trivial' constraint
          count01<-((i-1)*numcla)+j
          constraint[[count01]]<-constraintone[[1]]
        }
      }
    }
    probl <- Problem(obj, constraint)
    result <- solve(probl)
    mumat_est <- result$getValue(vmat) # get the solution
    
    # We finally compute the inner products (x^T%*%W) and count the number of inequalities
    numnoninterp<-0
    numlabinterp<-0
    for (i in 1:n) {
      inner01<-t(xmat[,i])%*%mumat_est[,labclass[i]]
      mumat_estcut<-mumat_est[,-(labclass[i])]
      for (j in 1:(numcla-1)) {
        innerneg<-t(xmat[,i])%*%mumat_estcut[,j]
        if(abs(inner01-(innerneg)-1)>=0.00001){
          numnoninterp<-numnoninterp+1
        }
        if((abs(inner01-labpos)<0.00001)&&(abs(innerneg-labneg)<0.00001)){
          numlabinterp<-numlabinterp+1
        }
      }
    }
    numnoninterpmat03[k,m]<-numnoninterp
    numlabinterpmat03[k,m]<-numlabinterp
  }
}

time02<-Sys.time()
time02-time01




#########################################################################################
###################### 5 classes ########################
p<-1000
nvec<-c(10,20,30,40,55,70,85,100)
#nvec<-c(30)
nlen<-length(nvec)
numcla<-5
labpos<-(numcla-1)/numcla
labneg<-(-1)/numcla

# I use eigenvectors of a covariance matrix as the mean vectors
# huge.generator is to create a covariance matrix
set.seed(22222)
dat00 <- huge.generator(n = 1000, d = p, graph = "random", v = NULL, u = NULL,
                        g = NULL, prob = NULL, vis = F, verbose = TRUE)
datmat00<-dat00$sigma
eig00<-eigen(datmat00)
mumator<-eig00$vectors[,1:numcla]  # To create the mean vectors
mumat<-1*(p^(0.5))*mumator
mumat04<-mumat
#mumat[,2]<-10*mumat[,1]

#probvec<-rep(1/(numcla),numcla)  # To set the probabilities for each class

# To create the covariance matrix (diagonal here)
lbdvec<-rep(1,p) # Diagonal elements
covmat<-diag(lbdvec)

numsim<-100  # number of simulations

# the matrix below shows the number of constraints that do not satisfy equality for each sample
# so the number of equality is n*(numcla-1) - the number below

numnoninterpmat04<-matrix(0,numsim,nlen)
numlabinterpmat04<-matrix(0,numsim,nlen)


time01<-Sys.time()
for(m in 1:nlen){
  n<-nvec[m]
  onevec<-rep(1,n)
  for(k in 1:numsim){
    set.seed(9*k)
    
    # We first create Y, X and Q(noise) matrices
    ymat<-matrix(0,numcla,n) # Y matrix
    qmat<-t(mvrnorm(n,rep(0,p),covmat)) # the noise matrix
    xmat<-qmat
    labclass<-rep(0,n)
    # We create the probability vector and assign the label
    for (i in 1:n) {
      expprobv<-rep(0.01,numcla)
      for (j in 1:numcla) {
        expprobv[j]<-exp(t(xmat[,i])%*%mumat[,j])
      }
      expprobtt<-sum(expprobv)
      probvec<-expprobv/expprobtt
      labclass[i]<-sample(seq(1:numcla),size = 1,replace = TRUE,prob = probvec)
      lll<-labclass[i]
      ymat[lll,i]<-1
    }
    
    # Next we do optimization
    vmat<-Variable((p),numcla)
    obj<-Minimize(cvxr_norm(vmat,"fro")^2)
    constraint<-vector(mode = "list",length = (n*(numcla)))
    # Add constraints - need to use the list data structure
    for(i in 1:n){
      for (j in 1:(numcla)) {
        if(j!=labclass[i]){
          constraintone<-list(t(xmat[,i])%*%(vmat[,(labclass[i])]-vmat[,j])>=1)
          count01<-((i-1)*numcla)+j
          constraint[[count01]]<-constraintone[[1]]
        }
        else{
          constraintone<-list(abs(vmat[1,1]-vmat[1,1])<=0.001) # cannot put NULL as a constraint, so just add a 'trivial' constraint
          count01<-((i-1)*numcla)+j
          constraint[[count01]]<-constraintone[[1]]
        }
      }
    }
    probl <- Problem(obj, constraint)
    result <- solve(probl)
    mumat_est <- result$getValue(vmat) # get the solution
    
    # We finally compute the inner products (x^T%*%W) and count the number of inequalities
    numnoninterp<-0
    numlabinterp<-0
    for (i in 1:n) {
      inner01<-t(xmat[,i])%*%mumat_est[,labclass[i]]
      mumat_estcut<-mumat_est[,-(labclass[i])]
      for (j in 1:(numcla-1)) {
        innerneg<-t(xmat[,i])%*%mumat_estcut[,j]
        if(abs(inner01-(innerneg)-1)>=0.00001){
          numnoninterp<-numnoninterp+1
        }
        if((abs(inner01-labpos)<0.00001)&&(abs(innerneg-labneg)<0.00001)){
          numlabinterp<-numlabinterp+1
        }
      }
    }
    numnoninterpmat04[k,m]<-numnoninterp
    numlabinterpmat04[k,m]<-numlabinterp
  }
}

time02<-Sys.time()
time02-time01



#remove(probl)
#remove(constraint)
#remove(obj)
#rm(result)
#remove(dat00)
#remove(datmat00)
#remove(eig00)


##############################################################################################
##############################################################################################
##########################  Figures ###########################
######### k = 4,3,6,5 ###########
numnoninterpsum<-matrix(0,4,nlen)
numlabinterpsum<-matrix(0,4,nlen)

for (j in 1:nlen) {
  numnoninterpsum[1,j]<-mean(numnoninterpmat[,j])
}
for (j in 1:nlen) {
  numnoninterpsum[2,j]<-mean(numnoninterpmat02[,j])
}
for (j in 1:nlen) {
  numnoninterpsum[3,j]<-mean(numnoninterpmat03[,j])
}
for (j in 1:nlen) {
  numnoninterpsum[4,j]<-mean(numnoninterpmat04[,j])
}

numnoninterpsum


for (j in 1:nlen) {
  numlabinterpsum[1,j]<-mean(numlabinterpmat[,j])
}
for (j in 1:nlen) {
  numlabinterpsum[2,j]<-mean(numlabinterpmat02[,j])
}
for (j in 1:nlen) {
  numlabinterpsum[3,j]<-mean(numlabinterpmat03[,j])
}
for (j in 1:nlen) {
  numlabinterpsum[4,j]<-mean(numlabinterpmat04[,j])
}

numlabinterpsum

numnoninterpprop<-matrix(0,4,nlen)
numlabinterpprop<-matrix(0,4,nlen)

numclavec<-c(4,3,6,5)
for (i in 1:length(numclavec)) {
  for (j in 1:nlen) {
    numnoninterpprop[i,j]<-numnoninterpsum[i,j]/(nvec[j]*(numclavec[i]-1))
  }
  for (j in 1:nlen) {
    numlabinterpprop[i,j]<-numlabinterpsum[i,j]/(nvec[j]*(numclavec[i]-1))
  }
}

numnoninterpprop
numlabinterpprop




rescnvec02a<-(numclavec[1]^2)*(nvec)*log(sqrt(numclavec[1])*nvec)/1000
rescnvec02b<-(numclavec[2]^2)*(nvec)*log(sqrt(numclavec[2])*nvec)/1000
rescnvec02c<-(numclavec[3]^2)*(nvec)*log(sqrt(numclavec[3])*nvec)/1000
rescnvec02d<-(numclavec[4]^2)*(nvec)*log(sqrt(numclavec[4])*nvec)/1000
rescnvec03a<-(numclavec[1]^1)*(nvec)*log(sqrt(numclavec[1])*nvec)/1000
rescnvec03b<-(numclavec[2]^1)*(nvec)*log(sqrt(numclavec[2])*nvec)/1000
rescnvec03c<-(numclavec[3]^1)*(nvec)*log(sqrt(numclavec[3])*nvec)/1000
rescnvec03d<-(numclavec[4]^1)*(nvec)*log(sqrt(numclavec[4])*nvec)/1000
rescnvec04a<-((numclavec[1]-1)^2)*(nvec)*log(sqrt(numclavec[1])*nvec)/1000
rescnvec04b<-((numclavec[2]-1)^2)*(nvec)*log(sqrt(numclavec[2])*nvec)/1000
rescnvec04c<-((numclavec[3]-1)^2)*(nvec)*log(sqrt(numclavec[3])*nvec)/1000
rescnvec04d<-((numclavec[4]-1)^2)*(nvec)*log(sqrt(numclavec[4])*nvec)/1000

length(nvec)
lab00<-c(rep("4",8),rep("3",8),rep("6",8),rep("5",8))
data_int01<-data.frame(c(numlabinterpprop[1,],numlabinterpprop[2,],numlabinterpprop[3,],numlabinterpprop[4,]),
                       c(nvec,nvec,nvec,nvec),lab00)
names(data_int01)<-c("interp","n","ratio")
title=expression(paste("k"))
plot_int01<-ggplot(data_int01, aes(x=n, y=interp,group=ratio)) + ylab("Fraction of support vectors") +
  geom_line(aes(color = ratio),size=1.5)+coord_cartesian(xlim = c(10, 100),ylim = c(0.5,1)) +geom_point(aes(color = ratio),size=3)+
  theme_minimal() +labs(color=title)+
  theme(axis.text = element_text(size = 12),axis.title = element_text(size = 21),legend.text = element_text(size = 17),legend.title = element_text(size = 21) )


lab00<-c(rep("4",8),rep("3",8),rep("6",8),rep("5",8))
data_int02<-data.frame(c(numlabinterpprop[1,],numlabinterpprop[2,],numlabinterpprop[3,],numlabinterpprop[4,]),
                       c(rescnvec02a,rescnvec02b,rescnvec02c,rescnvec02d),lab00)
names(data_int02)<-c("interp","n","ratio")
plot_int02<-ggplot(data_int02, aes(x=n, y=interp, group=ratio)) + ylab("Fraction of support vectors") + xlab(expression(paste("("~k^2~n~sqrt("log("~sqrt(k)~"n)")~")/"~"(p)")))+
  geom_line(aes(color = ratio),size=1.5)+coord_cartesian(xlim = c(0.05,20),ylim = c(0.5,1))+geom_point(aes(color = ratio),size=3)+
  theme_minimal() + 
  theme(axis.text = element_text(size = 12),axis.title = element_text(size = 21),legend.text = element_text(size = 17),legend.title = element_text(size = 21) )



lab00<-c(rep("4",8),rep("3",8),rep("6",8),rep("5",8))
data_int03<-data.frame(c(numlabinterpprop[1,],numlabinterpprop[2,],numlabinterpprop[3,],numlabinterpprop[4,]),
                       c(rescnvec03a,rescnvec03b,rescnvec03c,rescnvec03d),lab00)
names(data_int03)<-c("interp","n","ratio")
plot_int03<-ggplot(data_int03, aes(x=n, y=interp, group=ratio)) + ylab("Fraction of support vectors") + xlab(expression(paste("("~k~n~sqrt("log("~sqrt(k)~"n)")~")/"~"(p)")))+
  geom_line(aes(color = ratio),size=1.5)+coord_cartesian(xlim = c(0.05,3.2),ylim = c(0.5,1))+geom_point(aes(color = ratio),size=3)+
  theme_minimal() + 
  theme(axis.text = element_text(size = 12),axis.title = element_text(size = 21),legend.text = element_text(size = 17),legend.title = element_text(size = 21) )



legend_01<-get_legend(plot_int01)



p_grid<-plot_grid(
  plot_int01+theme(legend.position="none"),
  plot_int02+theme(legend.position="none"),
  plot_int03+theme(legend.position="none"),
  nrow=1)


plotg<-plot_grid(p_grid,legend_01,rel_widths = c(3, .4))

plotg





