


################################################################
################################################################
################################################################
################################################################
################################################################
################################################################
## PREPARING ENVIRONMENT

library(e1071)
library(magrittr)
library(tidyverse)
library(plot.matrix)

## created_data <- TRUE  ------> To load pre-runned experiments.
## created_data <- FALSE ------> Re-run analysis.
created_data <- FALSE

p_init <- 400
ctype <- "cor"
subfolder_plots <- paste0(getwd(),"/p_", p_init, "_", ctype, "/")

if (!dir.exists(subfolder_plots)) {
       dir.create(subfolder_plots)
}

if(created_data) {
  load(paste0("W26_Data2_p", p_init, "_", ctype, "s.RData"))
  
  
} else {
  load("W26_Data1.RData")
  
  source("src/plotting.r")
  source("src/005_SpectralCurvesPlots.R")
  source("src/041_VariableScreening.R")
  source("src/042_SGDsphere.R")
  source("src/043_SGDspheremulti.R")
  source("src/044_SGDstiefel.R")
  source("src/004_ModulePlotting.R")
  source("src/003_UsefulMatrixTransforms.R")
  source("src/log_transform.R")

  df_merged <- rbind(
    LUAD_mat_t, 
    LUSC_mat_t) 

  ## Log-transformation
  df_lt <- df_merged %>% 
    as_tibble() %>%
    select_if(function(x) {var(x) > 100}) %>%
    mutate_if(is_skewed, log_transform)
  dim(df_lt)



  df_lt <- df_lt %>% 
    mutate(cancer_type = rep(c("LUAD", "LUSC"), c(nrow(LUAD_mat_t), nrow(LUSC_mat_t))))

  LUAD_mat_lt <- df_lt %>%
    filter(cancer_type == "LUAD") %>%
    select(-cancer_type)
  LUSC_mat_lt <- df_lt %>%
    filter(cancer_type == "LUSC") %>%
    select(-cancer_type)

  var_luad <- LUAD_mat_lt %>% apply(2, var)
  var_lusc <- LUSC_mat_lt %>% apply(2, var)


  selected_vars <- unique(c(order(var_luad, decreasing = TRUE)[1:p_init],
                            order(var_lusc, decreasing = TRUE)[1:p_init]))
  p <- length(selected_vars)
}

##############################
##############################
## Visualize
png(filename = paste0(subfolder_plots, "p", p_init, "_", ctype,  "_skewness.png"), 
    width = 500, height = 500)
par(mfrow = c(2,2))

skew <- df_merged %>% 
  apply(2, skewness) %>% 
  unlist() %T>%
  {plot(., #sort(.), 
        main = "Skewness of the variables",
        xlab = "Variable index",
        ylab = "Skewness")}
abline(h= c(-1,0,1))



skew <- df_lt %>% 
  select(-cancer_type) %>%
  apply(2, function(x) {skewness(x[!is.na(x)])}) %T>% 
  {plot(., #sort(.), 
        main = "Skewness of the variables",
        xlab = "Variable index",
        ylab = "Skewness",
        ylim = c(-max(abs(.)), max(abs(.))))}
abline(h= c(-1,0,1))

df_merged %>% apply(2, var) %>% 
    plot(main = "Variance: pre LT")
df_lt %>% select(-cancer_type) %>% apply(2, var) %>%  
    plot(main = "Variance: post LT")
dev.off()





############################################################
############################################################
############################################################
############################################################
## IPC-HD on luad

if(!created_data) {
    
    cor_luad <- LUAD_mat_lt[, selected_vars] %>% {get(ctype)(.)}
    eig_luad <- cor_luad %>% eigen()


    ## Measures of connectivity:
    alpha_luad_d <- cor_luad %>% solve() %>% 
        apply(2, function(x) sum(x^2))

    alpha_luad_nd <-cor_luad %>% solve() %>% 
        {. - diag(diag(.))} %>%
        apply(2, function(x) sum(x^2))

    alpha_hubs_luad <- which(alpha_luad_nd > mean(alpha_luad_nd) + 2 * sd(alpha_luad_nd))
    alpha_hubs_luad

}


## Visualization:
png(filename = paste0(subfolder_plots, "p", p_init, "_", ctype,  "_ipchd_luad.png"), width = 500, height = 500)
par(mfrow = c(3,2),
    oma = c(0,0,3,0))
alpha_luad_d %>%
  plot(pch = 19, 
       main = "Alpha-values (D)",
       col = factor(1:p %in% alpha_hubs_luad))
alpha_luad_nd %>%
  plot(pch = 19, 
       main = "Alpha-values (ND)",
       col = factor(1:p %in% alpha_hubs_luad))

plot(1 / eig_luad$values, main = "Eigenvalues")

shat <- 3
im_luad3 <- eig_luad$vectors[, p - (1:shat) + 1]^2 %>% 
  apply(1, sum) %T>%
  plot(col = factor(1:p %in% alpha_hubs_luad), pch = 19,
       main = "luad Smoking: \nInf. Measures with s = 3")
abline(h = mean(im_luad3) + 2* sd(im_luad3))

shat <- 2
im_luad2 <- eig_luad$vectors[, p - (1:shat) + 1]^2 %>% 
  apply(1, sum) %T>%
  plot(col = factor(1:p %in% alpha_hubs_luad), pch = 19,
       main = "luad Smoking: \nInf. Measures with s = 2")
abline(h = mean(im_luad2) + 2* sd(im_luad2))

im_luad1 <- eig_luad$vectors[, p]^2 %T>% 
  plot(col = factor(1:p %in% alpha_hubs_luad), pch = 19,
       main = "luad Smoking: \nInf. Measures with s = 1")
abline(h = mean(im_luad1) + 2* sd(im_luad1))

imhubs_luad <- im_luad2 %>% 
  {which(. > mean(.) + 2 * sd(.))} 

mtext("IPC-HD: LUAD data analysis", 
      outer = TRUE, side = 3,
      cex = 2)
dev.off()



############################################################
############################################################
############################################################
############################################################
## IPC-HD on lusc

if(!created_data) {
    cor_lusc <- LUSC_mat_lt[, selected_vars] %>% {get(ctype)(.)}
    eig_lusc <- cor_lusc %>% eigen()
    p <- ncol(cor_lusc)


    ## Measures of connectivity:
    alpha_lusc_d <- cor_lusc %>% solve() %>% 
        apply(2, function(x) sum(x^2))

    alpha_lusc_nd <-cor_lusc %>% solve() %>% 
        {. - diag(diag(.))} %>%
        apply(2, function(x) sum(x^2))

    alpha_hubs_lusc <- which(alpha_lusc_nd > mean(alpha_lusc_nd) + 2 * sd(alpha_lusc_nd))
    alpha_hubs_lusc

}

## Visualization:
png(filename = paste0(subfolder_plots, "p", p_init, "_", ctype, "_ipchd_lusc.png"), width = 500, height = 500)
par(mfrow = c(3,2),
    oma = c(0,0,3,0))
## Alpha-values
alpha_lusc_d %>%
  plot(pch = 19, 
       main = "Alpha-values (D)",
       col = factor(1:p %in% alpha_hubs_lusc))
alpha_lusc_nd %>%
  plot(pch = 19, 
       main = "Alpha-values (ND)",
       col = factor(1:p %in% alpha_hubs_lusc))

## Eigen-values
plot(1 / eig_lusc$values, main = "Eigenvalues")

## Influence-Measures
shat <- 3
im_lusc3 <- eig_lusc$vectors[, p - (1:shat) + 1]^2 %>% 
  apply(1, sum) %T>%
  plot(col = factor(1:p %in% alpha_hubs_lusc), pch = 19,
       main = "lusc Smoking: \nInf. Measures with s = 3")
abline(h = mean(im_lusc3) + 2* sd(im_lusc3))

shat <- 2
im_lusc2 <- eig_lusc$vectors[, p - (1:shat) + 1]^2 %>% 
  apply(1, sum) %T>%
  plot(col = factor(1:p %in% alpha_hubs_lusc), pch = 19,
       main = "lusc Smoking: \nInf. Measures with s = 2")
abline(h = mean(im_lusc2) + 2* sd(im_lusc2))


im_lusc1 <- eig_lusc$vectors[, p]^2 %T>% 
  plot(col = factor(1:p %in% alpha_hubs_lusc), pch = 19,
       main = "lusc Smoking: \nInf. Measures with s = 1")
abline(h = mean(im_lusc1) + 2* sd(im_lusc1))

imhubs_lusc <- im_lusc2 %>% 
  {which(. > mean(.) + 2 * sd(.))} 
mtext("IPC-HD: LUSC data analysis", 
      outer = TRUE, side = 3,
      cex = 2)
dev.off()
## print(imhubs_lusc)


############################################################
############################################################
############################################################
## JIC-HD


if(!created_data) {

    cor_luad <- cor_luad / eig_luad$values[p]
    cor_lusc <- cor_lusc / eig_lusc$values[p]
    corlist <- list(cor_luad, cor_lusc)
    p <- ncol(cor_luad)

    eigen(cor_luad)$values[p]
    eigen(cor_lusc)$values[p]

    set.seed(2026)
    
    ##############################
    ## JIC-ESTIMATION: VARYING DIMENSION
    ## S = 1
    shat <- 2
    max.iter <- 10000
    nstarts <- 5
    cor_StOrac_obj1 <- sgd.stiefel(
        sigmalist = corlist,   ## Find common eigenvectors.
        p = p, ndir = 1, K = 2,
        type = "M", nstarts = nstarts,
        alpha = 4e-5, max.iter = max.iter)
    
    ##############################
    ## S = 2
    max.iter <- 10000
    nstarts <- 2
    cor_StOrac_obj2 <- sgd.stiefel(
        sigmalist = corlist,   ## Find common eigenvectors.
        p = p, ndir = 2, K = 2,
        type = "M", nstarts = nstarts,
        alpha = 3.75e-5, max.iter = max.iter)
    
    ##############################
    ## S = 3
    max.iter <- 10000
    nstarts <- 2
    cor_StOrac_obj3 <- sgd.stiefel(
        sigmalist = corlist,   ## Find common eigenvectors.
        p = p, ndir = 3, K = 2,
        type = "M", nstarts = nstarts,
        alpha = 3e-5, max.iter = max.iter)
    
    ##############################
    ## S = 4
    max.iter <- 10000
    nstarts <- 2
    cor_StOrac_obj4 <- sgd.stiefel(
        sigmalist = corlist,   ## Find common eigenvectors.
        p = p, ndir = 4, K = 2,
        type = "M", nstarts = nstarts,
        alpha = 3e-5, max.iter = max.iter)

}

############################################################
############################################################
## VISUALIZE OBJECTIVE FUNCTION UPDATES TO VERIFY CONVERGENCE
## s = 1
library(tidyverse)
png(filename = paste0(subfolder_plots, "p", p_init, "_", ctype, "_jichd1.png"), width = 500, height = 500)
M <- t(cor_StOrac_obj1$opt_seq) %>% as.data.frame()
colnames(M) <- paste0("seq", 1:5)
M %>% as_tibble() %>%
  dplyr::mutate(Time = 1:max.iter) %>%
  pivot_longer(cols = seq1:seq5,
               names_to = "curve_no",
               names_prefix = "seq",
               values_to = "fval") %>%
  mutate(curve_no = factor(curve_no)) %>%
  ggplot2::ggplot(aes(x = Time, y = log(fval, 10))) +
  #ggplot2::ggplot(aes(x = Time, y = fval)) +
  geom_line(aes(color=curve_no))
dev.off()
## 5.5

## s = 2
png(filename = paste0(subfolder_plots, "p", p_init, "_", ctype,  "_jichd2.png"), width = 500, height = 500)
M <- t(cor_StOrac_obj2$opt_seq) %>% as.data.frame()
colnames(M) <- paste0("seq", 1:2)
M %>% as_tibble() %>%
  dplyr::mutate(Time = 1:max.iter) %>%
  pivot_longer(cols = seq1:seq2,
               names_to = "curve_no",
               names_prefix = "seq",
               values_to = "fval") %>%
  mutate(curve_no = factor(curve_no)) %>%
  ggplot2::ggplot(aes(x = Time, y = log(fval, 10))) +
  #ggplot2::ggplot(aes(x = Time, y = fval)) +
  geom_line(aes(color=curve_no))
dev.off()
## 5.75

## s = 3
png(filename = paste0(subfolder_plots, "p", p_init, "_", ctype,  "_jichd3.png"), width = 500, height = 500)
M <- t(cor_StOrac_obj3$opt_seq) %>% as.data.frame()
colnames(M) <- paste0("seq", 1:2)
M %>% as_tibble() %>%
  dplyr::mutate(Time = 1:max.iter) %>%
  pivot_longer(cols = seq1:seq2,
               names_to = "curve_no",
               names_prefix = "seq",
               values_to = "fval") %>%
  mutate(curve_no = factor(curve_no)) %>%
  ggplot2::ggplot(aes(x = Time, y = log(fval, 10))) +
  #ggplot2::ggplot(aes(x = Time, y = fval)) +
  geom_line(aes(color=curve_no))
dev.off()
## 5.75

## s = 4
png(filename = paste0(subfolder_plots, "p", p_init, "_", ctype,  "_jichd4.png"), width = 500, height = 500)
M <- t(cor_StOrac_obj4$opt_seq) %>% as.data.frame()
colnames(M) <- paste0("seq", 1:2)
M %>% as_tibble() %>%
  dplyr::mutate(Time = 1:max.iter) %>%
  pivot_longer(cols = seq1:seq2,
               names_to = "curve_no",
               names_prefix = "seq",
               values_to = "fval") %>%
  mutate(curve_no = factor(curve_no)) %>%
  ggplot2::ggplot(aes(x = Time, y = log(fval, 10))) +
  #ggplot2::ggplot(aes(x = Time, y = fval)) +
  geom_line(aes(color=curve_no))
dev.off()
## 5.75




############################################################
############################################################
## Preliminary visualization of hub detection:

png(filename = paste0(subfolder_plots, "p", p_init, "_", ctype,  "_joint_plot.png"), width = 600, height = 400)
par(mfrow = c(2,2),
    oma = c(0,0,3,0))
imhubs_shared <- intersect(imhubs_lusc, imhubs_luad)
col <- ifelse(1:p %in% imhubs_shared, cbPalette[2],
              ifelse(1:p %in% imhubs_luad, cbPalette[3],
                     ifelse(1:p %in% imhubs_lusc, cbPalette[4], cbPalette[1])))

im_joint4 <- cor_StOrac_obj4$vectors^2 %>%
  apply(1, sum) %T>%
  plot(pch = 19,
       col = col,
       main = "JIC-HD: S = 4")
abline(h = mean(im_joint4) + 2* sd(im_joint4))
imhubs_joint4 <- im_joint4 %>%
  {which(. > mean(.) + 2 * sd(.))} %T>%
  print()

im_joint3 <- cor_StOrac_obj3$vectors^2 %>%
  apply(1, sum) %T>%
  plot(pch = 19,
       col = col,
       main = "JIC-HD: S = 3")
abline(h = mean(im_joint3) + 2* sd(im_joint3))
imhubs_joint3 <- im_joint3 %>%
  {which(. > mean(.) + 2 * sd(.))} %T>%
  print()

im_joint2 <- cor_StOrac_obj2$vectors^2 %>%
  apply(1, sum) %T>%
  plot(pch = 19,
       col = col,
       main = "JIC-HD: S = 2")
abline(h = mean(im_joint2) + 2* sd(im_joint2))

imhubs_joint2 <- im_joint2 %>%
  {which(. > mean(.) + 2 * sd(.))} %T>%
  print()


shat <- 1
im_joint1 <- cor_StOrac_obj1$vectors^2 %T>%
  plot(pch = 19,
       col = col,
       main = "JIC-HD: S = 1")
abline(h = mean(im_joint1) + 2 * sd(im_joint1))
imhubs_joint1 <- im_joint1 %>%
  {which(. > mean(.) + 2 * sd(.))} %T>%
  print()

mtext("JIC-HD: JOINT HUB DETECTION", 
      outer = TRUE, side = 3,
      cex = 2)
dev.off()







############################################################
############################################################
############################################################
############################################################
############################################################
############################################################
## S-Dimension estimation via eigengap analysis:

if (!created_data) {
  cor_StOrac_obj <- list(cor_StOrac_obj1)
  V_val <- rep(min(cor_StOrac_obj1$opt_seq), 10)
  
  for (shat in 2:10) {
    
    max.iter <- 10000
    nstarts <- 2
    print(shat)
    
    set.seed(2026)
    max.iter <- 10000
    nstarts <- 2
    cor_StOrac_obj[[shat]] <- sgd.stiefel(
      sigmalist = corlist,   ## Find common eigenvectors.
      p = p, ndir = shat, K = 2,
      type = "M", nstarts = nstarts,
      alpha = 3e-5, max.iter = max.iter)
    
    V_val[shat] <- min(cor_StOrac_obj[[shat]]$opt_seq) - sum(V_val[1:(shat - 1)])
  }
  
  vs <- lapply(cor_StOrac_obj, function(x) {min(x$opt_seq)}) %>% unlist()
  
}



############################################################
############################################################
## Code for Figure S.2
png(filename = paste0(subfolder_plots, "p", p_init, "_", ctype,  "_JICHD_eigengaps.png"), width = 600, height = 300)
par(mfrow = c(1,2))
vvals <- rep(vs[1], 10)
for (i in 2:10) {
  vvals[i] <- vs[i] - sum(vs[i - 1])
}
#plot(vs, pch = 19)
plot(
  1 / vvals, pch = 19, 
  main = "Estimated Eigenvalues", type = "b",
  ylab = "Estimated Joint Eigenvalues",
  xlab = "Eigenvalue Index")
plot(
  vvals[2:10] / vvals[1:9], pch = 19, 
  main = "Estimated Delta-ratios", type = "b",
  ylab = "Delta-ratio values",
  xlab = "Delta-ratio Index")
dev.off()
length(cor_StOrac_obj)




############################################################
############################################################
## Preparing environment to save:

ls(all.names = TRUE)


if(!created_data) {
  created_data <- TRUE
  save.image(paste0("W26_Data2_p", p_init, "_", ctype, "s.RData"))
}
rm(list = ls())




