
#### A.6 Visualization of functional observations ----
library(fda)
library(abind)
library(tidyr)
library(dplyr)
library(ggplot2)
library(scales)
library(gridExtra)

## with functions defined in single_server.R
nbasis<-4
basis<-create.fourier.basis(c(0,1),nbasis)
basismat<-eval.basis(ti,basis)[,-1]
mat<-ginv(t(basismat)%*%basismat)%*%t(basismat)#p*p
rm(order,knots,basis)
cutoff<-seq(0,0.5,length.out=10+1)[-1]

## the dataset can be downloaded from https://hastie.su.domains/ElemStatLearn/
da<-read.table('phoneme.data',header=T,sep=',')
yg<-da$g;speaker<-da$speaker;da<-as.matrix(da[,-c(1,258:259)])
id<-which(yg%in%c('sh','iy'));da<-da[id,];speaker<-speaker[id];yg<-yg[id];rm(id);da<-(da-13)
y<-rep(-1,length(yg));y[which(yg==yg[1])]<-1
nti<-256;ti<-(1:nti)/nti

## plot
inx<-c(which(y==1)[1:200],which(y==-1)[1:200])
d0<-list('y'=y[inx],'x'=da[inx,])
d11<-lapply(0:2,function(mi) dpfun(d0,q=1,dx=0,nol=mi))
pic1<-function(dd,rg=10){
  x<-dd$x;y<-dd$y;n <- nrow(x);p <- nti
  long_data <- data.frame(matrix = rep(1:n, each = p),
                          time = rep(ti, n),
                          value = c(as.matrix(t(x))))
  long_data$color <- rep(ifelse(y == -1, "red", "blue"), each = p)
  long_data$label <- rep(ifelse(y == -1, "iy", "sh"), each = p)
  filtered_data <- long_data
  means <- filtered_data %>%
    group_by(time, label) %>%
    summarize(mean_value = mean(value))
  pii<-ggplot(filtered_data, aes(x = time, y = value, group = matrix)) +
    geom_line(aes(color = label), alpha = 1) + 
    geom_line(data = means, aes(x = time, y = mean_value, group = label), color = "black", size = 1.2) +
    scale_color_manual(values = c("sh" = "blue", "iy" = "red")) +
    labs(x = "t", y = expression(x[ij](t))) +
    ylim(c(-rg, rg)) +
    theme_minimal() +
    theme(panel.border = element_rect(colour = "black", fill=NA, size=1), 
          legend.position = c(0.9, 0.9),
          legend.title = element_blank(),
          legend.key.width = unit(1.5, 'cm'),
          legend.text = element_text(face = "bold"))
  return(pii)
}
pic1(d0,rg=10.5);for(i in 1:length(d11)){plot(pic1(d11[[i]],rg=3.5+3*(i==1)))}

#### Classifiers ----
library(parallel)
library(MASS)
library(fda)
library(e1071)
library(refund)
library(VGAM)

cases<-'bothxy'
edpxy<-c(0.1,0.5,1:5,10)
if(cases=='onlyy'){
  edpy<-edpxy;dy_all<-exp(edpy)/(1+exp(edpy));dx_all<-rep(0,length(edpxy))}else if(cases=='onlyx'){
    edpx<-edpxy;dx_all<-2*nbasis/edpx;dy<-rep(1,length(edpxy))}else{
      edpy<-edpxy/(nbasis+1);dy_all<-exp(edpy)/(1+exp(edpy));dx_all<-2*(nbasis+1)/edpxy}
edpa<-exp(edpxy)/(1+exp(edpxy))

myres<-list()
for(j in 1:length(edpxy)){
  dx<-dx_all[j];dy<-dy_all[j];dai<-edpa[j]
  
  set.seed(7777)
  iseed<-sample(1000:10000,500,replace=F)
  cl<-makeCluster(50,type="FORK")
  myres[[j]]<-parLapply(cl,1:500,function(i){
    tryCatch({
      set.seed(iseed[i])
      
      ## dataset splitting
      inx0<-sample(2035,535);inx1<-sample((1:2035)[-inx0],300);inx2<-(1:2035)[-c(inx0,inx1)]
      N<-length(inx1);n0<-n1<-50;J<-24
      dtest<-list('y'=y[inx0],'x'=da[inx0,])
      dtrain<-list('y'=y[inx1],'x'=da[inx1,])
      davg<-lapply(1:J,function(j){
        inxi<-inx2[(1:(length(inx2)/J))+(j-1)*(length(inx2)/J)]
        return(list('y'=y[inxi],'x'=da[inxi,]))
      });mi=2
      dt1<-dpfun(dtrain,q=dy,sd=dx,nol=mi)
      
      ## build weak classifiers with training set
      models<-lapply(1:J,function(jj){
        inx<-sample(N,n0,replace=F)
        d1<-list('y'=dt1$y[inx],'x'=dt1$x[inx,],'beta'=dt1$beta[inx,])
        res1<-vecfun(list('y'=(d1$y+1)/2,'x'=d1$beta),dtest)
        res2<-dwdfun(d1,dtest);res3<-cgfun(d1,dtest)
        beta<-cbind(res1$beta,res2$beta,res3$beta)
        acc<-c(res1$acc,res2$acc,res3$acc);acc1<-accfun(davg[[jj]],beta,da)
        acc<-cbind('test'=acc,acc1)
        return(list('beta'=beta,'acc'=acc))
      })
      beta<-array(NA,dim=c(dim(models[[1]]$beta),J))
      for(ci in 1:J){beta[,,ci]<-models[[ci]]$beta}
      acc<-array(NA,dim=c(dim(models[[1]]$acc),J))
      for(ci in 1:J){acc[,,ci]<-models[[ci]]$acc}
      
      ## misclassification rate of weak classifiers
      accini<-apply(acc,MARGIN=c(1,2),function(x) mean(x,na.rm=T))
      colnames(accini)<-paste('cj',c('test','avg','err','crc'),sep='_')
      rownames(accini)<-c('logi','svm','dwd',paste0('cg',1:5))
      
      ## misclassification rate of MRMA and MR
      acci<-lapply(1:8,function(k) aggfun_T(acc[k,,],beta[,k,],dtest))
      acc_avg<-abind(acci,along=4)
      acc_avg_T<-acc_avg[1,,,] #MRMA
      acc_avg_mr<-acc_avg[2,,,] #MR
      
      ## misclassification rate of MA
      acci<-lapply(1:8,function(k) aggfun_F(acc[k,,],beta[,k,],dtest))
      acc_avg_F<-abind(acci,along=4)
      dimnames(acc_avg_mr)<-dimnames(acc_avg_T)<-dimnames(acc_avg_F)<-
        list(cutoff,c('test','avg','err','crc'),c('logi','svm','dwd',paste0('cg',1:5)))
      
      ## voting and averaging with equal weight
      davg1<-lapply(1:J,function(i) 
        list('y'=c(davg[[i]]$y,dtrain$y[(1:(N/J))+(i-1)*(N/J)]),
             'x'=rbind(davg[[i]]$x,dtrain$x[(1:(N/J))+(i-1)*(N/J),])))
      dt1<-lapply(davg1,function(dtii) dpfun(dtii,q=dy,dx=dx,nol=mi))
      models<-lapply(dt1,function(dtii){
        res1<-vecfun(list('y'=(dtii$y+1)/2,'x'=dtii$beta),dtest)
        res2<-dwdfun(dtii,dtest);res3<-cgfun(dtii,dtest)
        beta<-cbind(res1$beta,res2$beta,res3$beta)
        return(beta)
      })
      beta<-array(NA,dim=c(dim(models[[1]]),J))
      for(ci in 1:J){beta[,,ci]<-models[[ci]]}
      beta_w<-apply(beta,MARGIN=1:2,mean)
      acc<-apply(beta,MARGIN=2:3,function(coefi)
        as.numeric(((as.vector(coefi[1]+dtest$x%*%coefi[-1]/nti)>0)*2-1)) )
      acc_vote<-apply(acc,MARGIN=2,function(x) mean(sign(rowMeans(x))!=dtest$y))
      acc_w<-acc<-apply(beta_w,MARGIN=2,function(coefi)
        mean(as.numeric(((as.vector(coefi[1]+dtest$x%*%coefi[-1]/nti)>0)*2-1)!=dtest$y)) )
      
      ## classifier with all instances
      dtest<-list('y'=y[inx0],'x'=da[inx0,])
      dall<-list('y'=y[-inx0],'x'=da[-inx0,]);mi=2
      d1<-dpfun(dall,q=1,dx=0,nol=mi)
      acc0<-c(vecfun(list('y'=(d1$y+1)/2,'x'=d1$beta),dtest)$acc,
              dwdfun(d1,dtest)$acc,cgfun(d1,dtest)$acc)
      d1<-dpfun(dall,q=dy,sd=dx,nol=mi)
      acc1<-c(vecfun(list('y'=(d1$y+1)/2,'x'=d1$beta),dtest)$acc,
              dwdfun(d1,dtest)$acc,cgfun(d1,dtest)$acc)
      
      #weak,MA,MRMA,MR,voting,averaging
      return(list(accini,acc_avg_F,acc_avg_T,acc_avg_mr,cbind(acc_vote,acc_w),cbind(acc0,acc1)))
    },error=function(e){cat("ERROR : i = ",i,", ",conditionMessage(e), "\n")})
  })
  stopCluster(cl)
  save.image("real_data.Rdata")
  print(j)
}
save.image("real_data.Rdata")

#### Section 6. Real application ----
miall<-c(1:3,8);mis<-c('Logistic','SVM','DWD','CG')

plotfun<-function(data_lists,mi,list_id_labels,ry=c(0.05,0.95)){
  df <- do.call(rbind, lapply(1:length(data_lists), function(list_idx) {
    data.frame(
      list_id = list_idx,
      point = rep(1:8, unlist(lapply(data_lists[[list_idx]],length))),
      value = unlist(data_lists[[list_idx]])
    )
  }))
  summary_df <- df %>% 
    group_by(list_id, point) %>% 
    summarise(
      mean = mean(value),
      lwr = quantile(value, 0.025),
      upr = quantile(value, 0.975)
    )
  v_x <- edpxy
  summary_df$point_label <- as.factor(v_x[summary_df$point])
  
  line_types <- c(rep("solid",4),rep("longdash",3))
  mycolor<-c("#00BFC4", "#F8766D", "#00BA38", "#FFA500","#007AFF", "#C77CFF","#FF2D55")
  pics_mi <- 
    ggplot(summary_df, aes(x = point_label, y = mean, group = factor(list_id), color = factor(list_id))) +
    geom_line(position = position_dodge(0.2), aes(linetype = factor(list_id))) +
    geom_point(position = position_dodge(0.2)) +
    geom_errorbar(aes(ymin = lwr, ymax = upr), width = 0.2, position = position_dodge(0.2),linetype = "dashed") +
    labs(title = '', x = expression(epsilon), y = "Misclassification rate") +
    scale_color_manual(values = mycolor, labels = list_id_labels,name=NULL) +
    scale_linetype_manual(values = line_types, labels = list_id_labels,name=NULL) +
    scale_y_continuous(
      breaks = c(0.2, 0.4, 0.6, 0.8),
      labels = scales::percent_format(accuracy = 1)(c(0.2, 0.4, 0.6, 0.8))
    ) +
    theme_minimal() +
    coord_cartesian(ylim = ry) +
    theme(
      panel.border = element_rect(color = "black", fill = NA, size = 1),
      axis.text.x = element_text(color = "black"),
      axis.text.y = element_text(color = "black"),
      plot.title = element_text(size = 10),
      legend.position = c(1, 1), 
      legend.justification = c(1, 1),
      legend.key.width = unit(1.5, 'cm')
    ) +
    guides(color = guide_legend(override.aes = list(linetype = line_types))) # ,shape=NA,Combine legends
  
  return(pics_mi)
}
cuti<-2
corr<-T
res_pert<-lapply(miall,function(i){
  
  # weak classifier
  vini<-lapply(myres,function(myi) unlist(lapply(myi,function(myj) myj[[1]][i,3+corr])) )
  # MA
  vpert1<-lapply(myres,function(myi) {
    x=unlist(lapply(myi,function(myj) myj[[2]][cuti,3+corr,i]));return(x[!is.na(x)])} )
  # MRMA
  vpert2<-lapply(myres,function(myi) {
    x=unlist(lapply(myi,function(myj) myj[[3]][cuti,3+corr,i]));return(x[!is.na(x)])} )
  # MR
  vpert3<-lapply(myres,function(myi) {
    x=unlist(lapply(myi,function(myj) myj[[4]][cuti,3+corr,i]));return(x[!is.na(x)])} )
  # voting
  vini1<-lapply(myres,function(myi) unlist(lapply(myi,function(myj) myj[[5]][i,1])) )
  # averaging
  vini2<-lapply(myres,function(myi) unlist(lapply(myi,function(myj) myj[[5]][i,2])) )
  # all_data
  ciall<-lapply(my0,function(myi) unlist(lapply(myi,function(myj) myj[i])))
  
  return(list(vini,vpert3,vpert1,vpert2,vini1,vini2,ciall))
})
list_id_labels<-c('Weak','MR','MA','MRMA','Voting','Averaging','All data')
pics<-lapply(1:4,function(mi) plotfun(res_pert[[mi]],mi,list_id_labels,
                                      ry=c(0,1)))
for(i in 1:4){plot(pics[[i]])}


