
library(parallel)
library(MASS)
library(fda)
library(e1071)
library(ggplot2)

#### function ----
dgp<-function(n,sclass=0,h=0,alpha=0.1,xnb=50,int_nt=1000){
  
  for(ci in 1:5){
    zeta_phi<-matrix(unlist(lapply(1:int_nt,function(i) #xnb: num of basis of x
      c(1,sqrt(2)*cos((1:(xnb-1))*pi*i/int_nt)))),ncol=int_nt) #dim: xnb*int_nt
    x<-matrix(runif(n*xnb,-sqrt(3),sqrt(3)),ncol=xnb)%*%(zeta_phi * ((-1)^((1:xnb)+1)*(1:xnb)^(-1)) ) #dim: n*xnb %*% xnb*int_nt
    
    if(sclass==0){
      beta<-as.vector( (4*(-1)^((1:xnb)+1)*(1:xnb)^(-2)) %*% zeta_phi)
    }else if(sclass==1){
      beta<-as.vector( ( (4*(-1)^((1:xnb)+1) + runif(xnb,-1,1)*h/pi ) * (1:xnb)^(-2)) %*%zeta_phi)
    }else if(sclass==3){
      beta<-as.vector( ( (4*(-1)^((1:xnb)+1) - runif(xnb,1,2)*h/pi ) * (1:xnb)^(-2)) %*%zeta_phi)
    }else{
      rho<-exp(-15/int_nt);rho1<-sqrt(1-rho^2)
      z<-rnorm(1+int_nt,0,1);beta<-c(z[1],rep(0,int_nt))
      for(i in 1:int_nt){beta[i+1]<-rho*beta[i]+rho1*z[i+1]}
      beta<-beta[-1]
    }
    fx<-as.vector(alpha+x%*%beta/int_nt)
    y<-rbinom(n,1,exp(fx)/(1+exp(fx)));y[which(y==0)]<--1
    xobs<-x[,as.integer((1:nti)*(int_nt/nti))]
    
    if(sd(y)!=0){break}
  }
  return(list('y'=y,'x'=xobs))
}
dpfun<-function(d0,q=0.9,sd=0.01,pri=F,nol=0){
  n<-length(d0$y)
  if(pri){q0<-min(1-q,q-0.5);q<-runif(n,q-q0,q+q0)}
  y1<-d0$y*(rbinom(n,1,q)*2-1)
  
  beta<-mat%*%t(d0$x)
  if(nol && nbasis!=1){
    if(nol==1){beta<-apply(beta,MARGIN=2,function(x) x/max(abs(x)))}else{
      beta<-tanh(beta)}}
  if(nbasis==1){beta<-matrix(beta,nrow=1);if(nol){beta<-tanh(beta)}}
  beta<-beta+matrix(rnorm(length(beta),0,sd),nrow=nrow(beta))
  x1<-t(basismat%*%beta)
  
  return(list('y'=y1,'x'=x1,'beta'=t(beta)))
}
dwdfun<-function(d0,dtest,qi=0.1,lami=0.01,addcoef=NA,maxi=100,mini=1e-5,lammin=1e-5){
  #maxi:maximum #iteration; mini:threshold of stop iteration; lammin:minmum eigenvalue selected
  x<-d0$x;y<-d0$y
  nti<-ncol(x);N<-2;
  if(is.na(addcoef[[1]])){add0<-0}else{add0<-as.vector(x%*%addcoef$beta/nti)}
  psi1<-rep(1,nti);psi2<-(0:nti)/nti-0.5
  k2<-((psi2^2-1/12)/2)[-1];k4<-((psi2^4-psi2^2/2+7/240)/24)[-(nti+1)]
  k4st<-matrix(NA,ncol=nti,nrow=nti)
  for(i in 1:nti){k4st[i,i:nti]<-k4st[i:nti,i]<-k4[1:(nti-i+1)]}
  kst<-k2%*%t(k2)-k4st;rm(i,k2,k4,k4st) #nti*nti
  R0<-x%*%kst%*%t(x)/nti^2 #n*n, inner_prod
  S0<-x%*%cbind(psi1,psi2[-1])/nti #n*N, inner_prod
  
  ## for given q and lambda, qi and lami
  R<-R0;S<-S0;n<-nrow(x);theta<-rep(0,1+N+n)
  R_svd<-svd(R);k0<-sum(R_svd$d>lammin)
  B<-cbind(c(n,colSums(S)),rbind(colSums(S),t(S)%*%S))
  C<-cbind(colSums(R),R%*%S)
  Binv<-ginv(B)
  CBinv<-C%*%Binv
  if(is.na(addcoef[[1]])){addi=0}else{addi<-as.vector(addcoef$alpha+add0)}
  
  Pi<-(R_svd$d[1:k0])^2+(R_svd$d[1:k0])*2*n*qi*lami/(qi+1)^2
  Dinv<-R_svd$u[,1:k0]%*%diag(1/Pi,nrow=length(Pi),ncol=length(Pi))%*%t(R_svd$v[,1:k0])
  DinvC<-Dinv%*%C
  P<-Dinv+DinvC%*%ginv(B-t(C)%*%DinvC)%*%t(DinvC);PCBinv<-P%*%CBinv
  Ainv<-cbind(rbind(Binv+t(CBinv)%*%PCBinv,-PCBinv),rbind(-t(PCBinv),P))
  # iteration of theta
  for(i in 1:maxi){
    u<-as.vector(y*(cbind(rep(1,n),S,R)%*%theta+addi));u0<-which(u>qi/(1+qi))
    d1loss<-rep(-1,length(u))
    if(length(u0)>0){d1loss[u0]<- -(qi/u[u0]/(qi+1))^(qi+1)}
    r<-y*d1loss/n;rm(u,u0,d1loss)
    d1theta<-c(sum(r),t(S)%*%r,R%*%r+2*lami*R%*%theta[-(1:(1+N))])
    theta1<-as.vector(theta-n*qi/(qi+1)^2*Ainv%*%d1theta)
    if(mean((theta1-theta)^2)<=mini){break}else{theta<-theta1}
  }
  # the misclassification probability
  hat_alpha<-theta1[1]
  hat_beta<-as.vector(cbind(psi1,psi2[-1])%*%theta1[2:(1+N)]+kst%*%t(x)%*%theta1[-(1:(1+N))])
  
  test<-mean(((as.vector(hat_alpha+dtest$x%*%hat_beta/nti)>0)*2-1)!=dtest$y)
  return(list('beta'=c(hat_alpha,hat_beta),'acc'=test))
}
cgfun<-function(d0,dtest,di=5){
  x<-t(d0$x[which(d0$y==1),]);y<-t(d0$x[which(d0$y==-1),])
  n1<-ncol(x);n2<-ncol(y)
  mu1<-rowMeans(x);mu2<-rowMeans(y);mu<-mu1-mu2;mu12<-(n1*mu1+n2*mu2)/(n1+n2)
  hatk<-((x-mu1)%*%t(x-mu1)+(y-mu2)%*%t(y-mu2))/(n1+n2-2)
  rec<-list()
  phi<-0;v<-z<-mu
  for(i in 1:di){
    h<-as.numeric(sum(v*z)/t(v)%*%hatk%*%v)
    phi<-phi+h*v
    z<-mu-as.vector(hatk%*%phi)
    rec[[i]]<-phi
    g<-as.numeric((t(z)%*%hatk%*%v)/(t(v)%*%hatk%*%v))
    v<-z-g*v
  }
  recg<-matrix(unlist(rec),ncol=di)
  ypre<-apply((dtest$x%*%recg>0)*2-1,MARGIN=2,function(ydi)mean(dtest$y!=ydi))
  return(list('beta'=rbind(rep(0,di),recg),'acc'=ypre))
}
vecfun <- function(d0, dtest){
  logistic_model <- glm(y ~ ., data = data.frame(x = d0$x, y = d0$y), family = "binomial")
  beta_logistic <- coef(logistic_model)
  svm_model <- svm(x = d0$x, y = d0$y, kernel = "linear", probability = TRUE)
  beta_svm <- coef(svm_model)
  beta <- cbind(c(beta_logistic[1],basismat%*%beta_logistic[-1]),
               c(beta_svm[1],basismat%*%beta_svm[-1]))
  acc<-unlist(lapply(1:ncol(beta),function(i)
    mean(((as.vector(beta[1,i]+dtest$x%*%beta[-1,i]/nti)>0)*2-1)!=dtest$y)))
  return(list('beta' = beta, 'acc' = acc))
}

#### sim ----
## parameters
nb<-1:20;dy<-1;dx<-0
nti<-50;ti<-(1:nti)/nti

myres<-list()
for(j in 1:length(nb)){
  nbasis<-nb[j]
  order<-min(nbasis,4)
  knots<-seq(0,1,1/(nbasis-order+1))
  basis<-create.bspline.basis(c(0,1),nbasis,order,knots)
  basismat<-eval.basis(ti,basis)
  mat<-ginv(t(basismat)%*%basismat)%*%t(basismat)#p*p
  rm(order,knots,basis)
  
  set.seed(7777)
  iseed<-sample(1000:10000,500,replace=F)
  cl<-makeCluster(25,type="FORK")
  myres[[j]]<-parLapply(cl,1:500,function(i){
    tryCatch({
      set.seed(iseed[i])
      d0<-dgp(50);dtest<-dgp(500)
      d11<-lapply(0:2,function(mi) dpfun(d0,q=dy,sd=dx,nol=mi))
      
      dwd0<-dwdfun(d0,dtest)
      cg0<-cgfun(d0,dtest)
      beta0<-cbind(dwd0$beta,cg0$beta)
      acc0<-c(dwd0$acc,cg0$acc)
      colnames(beta0)<-names(acc0)<-c('dwd',paste0('cg',1:5))
      
      resd11<-lapply(d11,function(d1){
        res1<-vecfun(list('y'=(d1$y+1)/2,'x'=d1$beta),dtest) #logistic, svm
        res2<-dwdfun(d1,dtest);res3<-cgfun(d1,dtest)
        beta1<-cbind(res1$beta,res2$beta,res3$beta)
        acc1<-c(res1$acc,res2$acc,res3$acc)
        return(list('beta'=beta1,'acc'=acc1))
      })
      beta1<-array(NA,dim=c(51,8,length(resd11)),
                   dimnames=list(NULL,c('logi','svm','dwd',paste0('cg',1:5)),
                                                 c('coef','nol1','nol2')))
      for(di in 1:length(resd11)){beta1[,,di]<-resd11[[di]]$beta}
      acc1<-do.call(rbind,lapply(resd11,function(di) di$acc))
      colnames(acc1)<-c('logi','svm','dwd',paste0('cg',1:5));rownames(acc1)<-c('coef','nol1','nol2')
      
      return(list('beta0'=beta0,'acc0'=acc0,'beta1'=beta1,'acc1'=acc1))
    },error=function(e){cat("ERROR : i = ",i,", ",conditionMessage(e), "\n")})
  })
  print(j)
  save.image("encoding.Rdata")
}
save.image("encoding.Rdata")

