
taskID <- as.integer(Sys.getenv("SLURM_ARRAY_TASK_ID"))
require(Matrix)


#save simulation settings by array number
Np = c(5, 10, 15)
N = c(1000,5000,10000)

sim_grid <- expand.grid(Np, N)

pop_params <- sim_grid[as.integer(taskID/100)+1,]

epsilon = c(0.3,0.6, 0.9)
prop = c(0.25,0.5,0.75)

sim_grid <- expand.grid(epsilon, prop)

block_params <- sim_grid[as.integer(taskID/100)+1,]


mle_lambda <- function(rds_network, current_sample,seeds, gamma_prior = NULL){
  #calculate the mle lambda given the current network proposal
  if( is.null(gamma_prior)){
    gamma_prior = c(0,0)
  }
  n <- dim(current_sample)[1]
  components <- Likelihood_components(rds_network = rds_network, proposal = current_sample)
  mle_lambda <- (n - length(seeds) + gamma_prior[1])/(t(components$s) %*% components$w + gamma_prior[2])
  return(mle_lambda)
}

Likelihood_components <- function(rds_network, proposal){
  #Likelihood components
  
  w <- rds_network$times
  u <- rds_network$degrees[rds_network$recruited] - rowSums(proposal)
  #let's start constructing s
  coupon_matrix <- rds_network$final_coupon_matrix
  AC <- proposal %*% coupon_matrix
  lt_AC <- AC * as.numeric(lower.tri(AC, diag = T))
  s <- rowSums(t(lt_AC)) + t(coupon_matrix) %*% u 
  return(list(w = w, u = u, s = s, lt_AC = lt_AC))
}


log_likelihood <- function(rds_network, current_sample, lambda, seeds){
  #log likelihood
  
  components <- Likelihood_components(rds_network, current_sample)
  
  n <- dim(current_sample)[1]
  
  log_likelihood <- -(lambda*(t(components$s) %*% components$w) - 
                        sum(log(rep(lambda, length(components$s)- length(seeds))* 
                                  components$s[-(1:length(seeds))]
                        )
                      )
  )
  
  return(log_likelihood)
  
}


log_likelihood_lambda_Gs <- function(rds_network, current_sample, lambda, seeds){
  #log-likelihood of the RDS process
  
  components <- Likelihood_components(rds_network, current_sample)
  
  n <- dim(current_sample)[1]
  
  log_likelihood <- -(lambda*(t(components$s) %*% components$w) - 
                        sum(log(rep(lambda, length(components$s)- length(seeds) )* 
                                  components$s[-(1:length(seeds))]
                        )
                        )
  )
  

  return(log_likelihood)
  
}


new_proposal_flip<- function(rds_network, current_sample){
  #New proposal for hill-climbing to find MLE
  
  #degree resources
  u <- rds_network$degrees[rds_network$recruited] - rowSums(current_sample)
  #underlying rds network
  base <- rds_network$network_recruited
  #number of nodes
  size <- dim(current_sample)[1]
  #indicate that the loop has found a new proposal
  found <- F
  #when finding a new sample, start at old one
  new_sample <- current_sample
  #start the loop
  while(!found){
    #select random nodes
    if(runif(1) < 0.5){
      difference <- current_sample - base
      subtractable_vertices<- which(difference == 1, arr.ind = T)
      num <- dim(subtractable_vertices)[1]
      if(num == 0){
        return(list(new_sample = current_sample, 0))
      }
      random <- sample(1:num, 1)
      random_vertices <- subtractable_vertices[random,]
      random_vertices <- sort(random_vertices)
      if(current_sample[random_vertices[1], random_vertices[2]] == 1 & 
         base[random_vertices[1], random_vertices[2]] == 0){
        #subtract the edge
        new_sample[random_vertices[1], random_vertices[2]]<- 0
        new_sample[random_vertices[2], random_vertices[1]] <- 0
        found <- T
        direction <- "subtract"
      } else{
        print("error")
      }
    } else{
      u_ind <- as.numeric(u>0)
      add_matrix <-(1-current_sample) * (u_ind %*% t(u_ind)) * (1- diag(1,size, size))
      add_vertices<- which(add_matrix == 1, arr.ind = T)
      num <- dim(add_vertices)[1]
      if(num == 0){
        return(list(new_sample = current_sample, 0))
      }
      random <- sample(1:num, 1)
      random_vertices <- add_vertices[random,]
      random_vertices <- sort(random_vertices)
      if (sum(u[random_vertices] > 0) ==2 & current_sample[random_vertices[1], random_vertices[2]] == 0){
        #add the edge
        new_sample[random_vertices[1], random_vertices[2]] <- 1
        new_sample[random_vertices[2], random_vertices[1]] <- 1
        found <- T
        direction <- "add"
      } else{
        print("error")
      }
    }
  }
  return(list(new_sample = new_sample,direction = direction, random_vertices = random_vertices ))
  
}


#alternative proposal mechanism
new_proposal<- function(rds_network, current_sample){
  #New proposal for hill-climbing to find MLE
  
  #degree resources
  u <- rds_network$degrees[rds_network$recruited] - rowSums(current_sample)
  #underlying rds network
  base <- rds_network$network_recruited
  #number of nodes
  size <- dim(current_sample)[1]
  #indicate that the loop has found a new proposal
  found <- F
  #when finding a new sample, start at old one
  new_sample <- current_sample
  #start the loop
  while(!found){
    #select random nodes
    random_vertices<- sample(x = 1:size, size = 2, replace = FALSE)
    random_vertices <- sort(random_vertices)
    #if the degree of the two vertices is sufficient and the edge does not exist, then:
    if (sum(u[random_vertices] > 0) ==2 & current_sample[random_vertices[1], random_vertices[2]] == 0){
      #add the edge
      new_sample[random_vertices[1], random_vertices[2]] <- 1
      new_sample[random_vertices[2], random_vertices[1]] <- 1
      found <- T
      direction <- "add"
    }
    #if the edge between the two random nodes is present, but is absent from baseline RDS
    if(current_sample[random_vertices[1], random_vertices[2]] == 1 & 
       base[random_vertices[1], random_vertices[2]] == 0){
      #subtract the edge
      new_sample[random_vertices[1], random_vertices[2]]<- 0
      new_sample[random_vertices[2], random_vertices[1]] <- 0
      found <- T
      direction <- "subtract"
    }
  }
  return(list(new_sample = new_sample,direction = direction, random_vertices = random_vertices ))
  
}

beta_mle <- function(Y, X_matrix){
  #linear regression beta mle
  beta <- solve(t(X_matrix) %*% X_matrix) %*% t(X_matrix) %*% Y
  return(beta)
}

sigma_mle <- function(Y, beta, X_matrix){
  #linear regression sigma mle
  sigma <- mean((Y-X_matrix %*% beta)^2)
  return(sigma)
}


log_likelihood_Beta_sigma <- function(Y, X_matrix, beta, sigma){
  #linear regression likelihood
  predictions <- X_matrix %*% solve(t(X_matrix) %*% X_matrix) %*% t(X_matrix) %*% Y 
  SSE <- sum(  (  (Y - predictions)/sigma  )^2  )
  log_normal_like <- -(n/2)*log(2*pi)- n*log(sigma)- 0.5*SSE 
  return(log_normal_like)
}

total_log_likelihood<- function(rds, Y, regress_vec, Gs,lambda, seeds){
  #combine RDS process and regression likelihood
  regress_like <- log_likelihood_Beta_sigma(Y, regress_vec$X_matrix, regress_vec$beta, regress_vec$sigma)
  craw_like <- log_likelihood_lambda_Gs(rds, Gs, lambda, seeds)
  total_log_like <- regress_like + craw_like 
  return(total_log_like)
}

regression_mle <- function(Y, Gs){
  #generate MLE for regression
  X_matrix <- X_matrix_fun(Gs)
  beta <- beta_mle(Y, X_matrix)
  sigma <- sigma_mle(Y, beta, X_matrix)
  
  return(list(X_matrix = X_matrix, beta = beta, sigma = sigma))
}

X_matrix_fun <- function(Gs){
  #generate components for regression
  
  n <- dim(Gs)[1]
  d_o <- numeric(n)
  for ( i in 1:n ){
    d_o[i] <- sum(Gs[i, 1:(i-1)])
  }
  
  X_matrix <- cbind(1, d_o)
  return(X_matrix)
}




regularized_like_N <- function(alpha, beta, c, N, G_s, degrees, n){
  #generate the regularized mle for N
  
  d_u <- numeric(n)
  
  for ( i in 1:n ){
    d_u[i] <- degrees[i] - sum(G_s[i, 1:(i-1)])
  }
  
  first_comp <- sum(lchoose(N-1:n,d_u)) 
  second_comp <- lbeta(sum(d_u) + alpha, 
                       n*N - choose(n+1, 2) - sum(d_u) + beta)
  third_comp <- -c*log(N)
  
  return(first_comp + second_comp + third_comp)
}

regularized_mle_N <- function(alpha, beta, c, G_s, degrees, n){
  #maximize the regularized likelihood
  
  N <- n*2
  higher_like <- regularized_like_N(alpha, beta, c, N+5, G_s, degrees, n)
  lower_like <- regularized_like_N(alpha, beta, c, N-5, G_s, degrees, n)
  higher_search <- higher_like > lower_like
  if(higher_search){
    N <- N+5
  }
  else{
    N <- N-5
  }
  go <- T
  i <- 0 
  while (go){
    if(higher_search){
      new_higher_like <- regularized_like_N(alpha, beta, c, N+5, G_s, degrees, n)
      if (new_higher_like > higher_like){
        N <- N+5
        higher_like <- new_higher_like
      }
      else{
        return(N+5)
      }
    }
    else{
      new_lower_like <- regularized_like_N(alpha, beta, c, N-10, G_s, degrees, n)
      if (new_lower_like > lower_like){
        N <- N-5
        lower_like <- new_lower_like
      }
      else{
        return(N-5)
      }
    }
  }
  
  i <- i +1 
  
  if (i > 5000){
    print("exceeded")
    return(N)
  }
}



block_model <- F #set to produce Simulation 1
rds_results <- readRDS(paste0('rds_', taskID, '.rds'))
rds <- rds_results$rds

if(block_model){
  N <- 5000
  prob <- in_prob 
  epsilon <- as.double(block_params[1])
  prop <- as.double(block_params[2])
  in_prob <- rds_results$in_prob
  
}else{
  epsilon <- NA
  prop <- NA
  N<- as.double(pop_params[2])
  prob <- as.double(pop_params[1]/pop_params[2])
  
}


seeds <- rds$seeds
components <- Likelihood_components(rds, rds$true_network)
s_vector <- components$s
print("start")
times <- rexp(length(s_vector)-length(seeds), as.vector(s_vector)[-(1:length(seeds))])
rds$times <- c(rep(0, length(seeds)),times)
print(sum(rds$true_network))

#MLE OF LAMBDA OF TRUE GRAPH
best_mle_know <-as.numeric(mle_lambda(rds, rds$true_network, seeds))

best_likelihood_know <- log_likelihood(rds, rds$true_network, best_mle_know, seeds)

print(best_mle_know)


#generate regression data
true_graph <- rds$true_network

n <- dim(true_graph)[1]

true_beta <- c(0,1)

sigma <- 1

X_matrix <- X_matrix_fun(true_graph)

Y <- X_matrix %*% true_beta + rnorm(n, 0, sigma)





#initialize parameters
current_sample <- rds$network_recruited
mle <- mle_lambda(rds, rds$network_recruited, seeds)
current_log_likelihood <- -Inf

for(i in 1:5000){
  #propose new graph - equally likely to propose subtraction and addition
  proposal <- new_proposal_flip(rds, current_sample)
  proposal <- proposal$new_sample
  
  #find lambda MLE and determine likelihood at the MLE
  proposal_lambda_mle <- mle_lambda(rds, proposal, seeds)
  proposal_regress <- regression_mle(Y, proposal)
  proposal_likelihood <- total_log_likelihood(rds, Y, proposal_regress, proposal, proposal_lambda_mle, seeds)
  
  
  if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
    
    #climb if proposal is better than current parameters
    current_log_likelihood <- proposal_likelihood
    current_sample <- proposal
    mle <- proposal_lambda_mle
    current_regress <- proposal_regress
  }
}

given_mle <- as.numeric(mle)
given_edges <- as.numeric(sum(current_sample))
original_G_S_est <- current_sample
given_s <- as.vector(Likelihood_components(rds, original_G_S_est)$s)

print("given_mle")
print(given_mle)
print("given_edges")
print(given_edges)


true_times <- rds$times


lambda_seq<- seq(given_mle - 0.4 , given_mle + 0.4 , by = 0.1)
bias_vec <- numeric(length(lambda_seq))
mle_vec <- numeric(length(lambda_seq))

for(j in 1:length(lambda_seq)){
  
  #generate expected value of lambda MLE with set lambda^k
  static_lambda <- lambda_seq[j]
  
  q <- 25
  mle_bias_vec <- numeric(q)
  

  for(k in 1:q){
   
    #generate most likely subgraph with set lambda^k
    rds$times <- true_times

    #initialize parameters
    current_sample <- rds$network_recruited
    current_log_likelihood <- -Inf
    
    for(i in 1:5000){
      
      #propose new graph - equally likely to propose subtraction and addition
      proposal <- new_proposal_flip(rds, current_sample)
      proposal <- proposal$new_sample
      
      #find lambda mle and determine likelihood at the mle
      proposal_regress <- regression_mle(Y, proposal)
      proposal_likelihood <- total_log_likelihood(rds, Y, proposal_regress, proposal, static_lambda, seeds)
      
      
      if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
        
        #climb is proposal is better than current parameters
        current_log_likelihood <- proposal_likelihood
        current_sample <- proposal
        
      }
    }
    
    given_G_S <- current_sample
    print(sum(given_G_S))
    
    s_vector <- Likelihood_components(rds, given_G_S)$s
    times <- rexp(length(s_vector)-length(seeds), as.vector(s_vector)[-(1:length(seeds))])
    rds$times <- c(rep(0, length(seeds)),times)
    
    #estimate bias based on this lambda, G_S pairing
    
    
    #initialize parameters
    current_sample <- rds$network_recruited
    mle <- mle_lambda(rds, rds$network_recruited, seeds)
    current_log_likelihood <- -Inf
    
    for(i in 1:5000){
      #propose new graph - equally likely to propose subtraction and addition
      proposal <- new_proposal_flip(rds, current_sample)
      proposal <- proposal$new_sample
      
      #find lambda mle and determine likelihood at the mle
      proposal_lambda_mle <- mle_lambda(rds, proposal, seeds)
      proposal_regress <- regression_mle(Y, proposal)
      proposal_likelihood <- total_log_likelihood(rds, Y, proposal_regress, proposal, proposal_lambda_mle, seeds)
      
      
      if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
        
        #climb if proposal is better than current parameters
        current_log_likelihood <- proposal_likelihood
        current_sample <- proposal
        mle <- proposal_lambda_mle
        current_regress <- proposal_regress
      }
    }
    
    
    mle_bias_vec[k] <- mle
    print(mle)
    rds$times <- true_times
  }
  mle <- mean(mle_bias_vec)
  print(static_lambda)
  print(mle)
  #calculate the bias
  bias <- mle - static_lambda
  bias_vec[j] <- as.numeric(bias)
  mle_vec[j] <- as.numeric(mle)
}

distances <- abs(mle_vec - given_mle)

true_lambda_est <- lambda_seq[which(min(distances) == distances)]


#final estimate of G_S based on the IIE of lambda

#initialize parameters
current_sample <- rds$network_recruited
mle <- mle_lambda(rds, rds$network_recruited, seeds)
current_log_likelihood <- -Inf

for(i in 1:7000){
  #propose new graph - equally likely to propose subtraction and addition
  proposal <- new_proposal_flip(rds, current_sample)
  proposal <- proposal$new_sample
  
  #find lambda mle and determine likelihood at the mle
  proposal_regress <- regression_mle(Y, proposal)
  proposal_likelihood <- total_log_likelihood(rds, Y, proposal_regress, proposal,
                                              true_lambda_est, seeds)

  if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
    
    #climb is proposal is better than current parameters
    current_log_likelihood <- proposal_likelihood
    current_sample <- proposal
    current_regress <- proposal_regress
  } 
}

true_G_S_est <- current_sample
true_edges_est <- as.numeric(sum(true_G_S_est))
regress_better <- current_regress

print(true_edges_est)


#accuracy metrics 

true_graph <- rds$true_network

rec_num <- dim(true_graph)[1]


#accuracy metrics for first estimate original_G_S_est 
estimated_subgraph <- original_G_S_est
Accuracy_O <- (sum(true_graph == estimated_subgraph)- rec_num)/( rec_num^2 )
TPR_O <- sum((true_graph == estimated_subgraph) * true_graph)/(sum(estimated_subgraph))
TNR_O <- (sum((true_graph == estimated_subgraph)*(1-true_graph)) - rec_num)/
  (sum(1-estimated_subgraph) - rec_num)




#accuracy metrics for real estimate true_G_S_est
estimated_subgraph <- true_G_S_est
Accuracy_T <- (sum(true_graph == estimated_subgraph)- rec_num)/( rec_num^2 )
TPR_T <- sum((true_graph == estimated_subgraph) * true_graph)/(sum(estimated_subgraph))
TNR_T <- (sum((true_graph == estimated_subgraph)*(1-true_graph)) - rec_num)/
  (sum(1-estimated_subgraph) - rec_num)




p <- prob
rds$times <- true_times

#regularization settings
alpha_vec <- c(0.01,0.1, 1,  10, 100, 500, 1000)
beta_vec <- alpha_vec*(1-prob)/prob
alpha_vec <- c(1, alpha_vec)
beta_vec <- c(1, beta_vec)
c <- 0

#estimate population size
print(given_edges)
degrees <- rds$degrees[rds$recruited]
true_graph <- rds$true_network
n <- as.numeric(dim(true_graph)[1])

results <-  matrix(nrow = 0, ncol = 25)

for (i in 1:length(alpha_vec)){
  
  #establish regularization
  alpha <- alpha_vec[i]
  beta <- beta_vec[i]
  
  #the regularized mle given the true subgraph
  full_true_est_N <- regularized_mle_N(alpha, beta, c, true_graph, degrees, n)
  
  
  #the regularized mle given the MLE subgraph
  full_Craw_est_N <- regularized_mle_N(alpha, beta, c, original_G_S_est, degrees, n)
  
  #the regularized mle given the IIE subgraph
  full_better_est_N <- regularized_mle_N(alpha, beta, c, true_G_S_est, degrees, n)
  
  true_edges <- as.numeric(sum(rds$true_network))
  
  
  
  
  results <- rbind(results, c(prop, epsilon, N, prob*N, best_mle_know, true_lambda_est, given_mle,
                              prob_T,
                              true_edges, true_edges_est, given_edges,
                              Accuracy_T, Accuracy_O,
                              TPR_T, TPR_O,
                              TNR_T, TNR_O,
                              alpha, beta,
                              full_true_est_N, (full_true_est_N - N)^2,
                              full_Craw_est_N, (full_Craw_est_N - N)^2, 
                              full_better_est_N, (full_better_est_N - N)^2
  ))
}

results_frame <- data.frame(results)


colnames(results_frame) <- c("sb_prop", "epsilon", "N", "Np", "best MLE Gs Known", "Alt Lambda Est",
                             "Crawford Lambda Est",
                             "True Prob",
                             "Es Size", "Alt Es Size", "Crawford Es Size",
                             "Alt Accuracy", "Crawford Accuracy",
                             "Alt TPR", "Crawford TPR",
                             "Alt TNR", "Crawford TNR",
                             "alpha", "beta",
                             "Full Perf N Est", "Full Perf N MSE",
                             "Full Crawford N est", "Full Crawford N MSE",
                             "Full Alt N Est", "Full Alt N MSE")




saveRDS(results, paste0('RegbiasLambda_', taskID, '.rds')) 

