
taskID <- as.integer(Sys.getenv("SLURM_ARRAY_TASK_ID"))

require(Matrix)


mle_lambda <- function(rds_network, current_sample, seeds, gamma_prior = NULL){
  #calculate the MLE lambda given the current network proposal
  if( is.null(gamma_prior)){
    gamma_prior = c(0,0)
  }
  n <- dim(current_sample)[1]
  components <- Likelihood_components(rds_network = rds_network, proposal = current_sample)
  mle_lambda <- (n - length(seeds) + gamma_prior[1])/(t(components$s) %*% components$w + gamma_prior[2])
  return(mle_lambda)
}

Likelihood_components <- function(rds_network, proposal){
  #likelihood components
  
  w <- rds_network$times
  u <- rds_network$degrees - rowSums(proposal)s
  #let's start constructing s
  coupon_matrix <- rds_network$final_coupon_matrix
  AC <- proposal %*% coupon_matrix
  lt_AC <- AC * as.numeric(lower.tri(AC, diag = T))
  s <- rowSums(t(lt_AC)) + t(coupon_matrix) %*% u 
  return(list(w = w, u = u, s = s, lt_AC = lt_AC))
}



log_likelihood <- function(rds_network, current_sample, lambda, seeds){
  #log likelihood
  
  components <- Likelihood_components(rds_network, current_sample)
  
  n <- dim(current_sample)[1]
  
  log_likelihood <- -(lambda*(t(components$s) %*% components$w) - 
                        sum(log(rep(lambda, length(components$s)-length(seeds) )* 
                                  components$s[-seeds]
                        )
                      )
  )
  
  return(log_likelihood)
  
}



new_proposal_flip<- function(rds_network, current_sample){
  #new proposal network for hill-climbing to find MLE
  
  #degree resources
  u <- rds_network$degrees - rowSums(current_sample) #[rds_network$recruited]
  #underlying rds network
  base <- rds_network$network_recruited
  #number of nodes
  size <- dim(current_sample)[1]
  #indicate that the loop has found a new proposal
  found <- F
  #when finding a new sample, start at old one
  new_sample <- current_sample
  #start the loop
  while(!found){
    #select random nodes
    if(runif(1) < 0.5){
      difference <- current_sample - base
      subtractable_vertices<- which(difference == 1, arr.ind = T)
      num <- dim(subtractable_vertices)[1]
      if(num == 0){
        return(list(new_sample = current_sample, 0))
      }
      random <- sample(1:num, 1)
      random_vertices <- subtractable_vertices[random,]
      random_vertices <- sort(random_vertices)
      if(current_sample[random_vertices[1], random_vertices[2]] == 1 & 
         base[random_vertices[1], random_vertices[2]] == 0){
        #subtract the edge
        new_sample[random_vertices[1], random_vertices[2]]<- 0
        new_sample[random_vertices[2], random_vertices[1]] <- 0
        found <- T
        direction <- "subtract"
      } else{
        print("error")
      }
      
    } else{
      u_ind <- as.numeric(u>0)
      add_matrix <-(1-current_sample) * (u_ind %*% t(u_ind)) * (1- diag(1,size, size))
      add_vertices<- which(add_matrix == 1, arr.ind = T)
      num <- dim(add_vertices)[1]
      if(num == 0){
        return(list(new_sample = current_sample, 0))
        print("limit")
      }
      random <- sample(1:num, 1)
      random_vertices <- add_vertices[random,]
      random_vertices <- sort(random_vertices)
      if (sum(u[random_vertices] > 0) ==2 & current_sample[random_vertices[1], random_vertices[2]] == 0){
        #add the edge
        new_sample[random_vertices[1], random_vertices[2]] <- 1
        new_sample[random_vertices[2], random_vertices[1]] <- 1
        found <- T
        direction <- "add"
      } else{
        print("error")
      }
    }

  }

    return(list(new_sample = new_sample,direction = direction, random_vertices = random_vertices ))
  
}


regularized_like_N <- function(alpha, beta, c, N, G_s, degrees, n){
  #generate the regularized mle for N
  
  d_u <- numeric(n)
  
  for ( i in 1:n ){
    d_u[i] <- degrees[i] - sum(G_s[i, 1:(i-1)])
  }
  
  first_comp <- sum(lchoose(N-1:n,d_u)) 
  second_comp <- lbeta(sum(d_u) + alpha, 
                       n*N - choose(n+1, 2) - sum(d_u) + beta)
  third_comp <- -c*log(N)
  
  return(first_comp + second_comp + third_comp)
}

regularized_mle_N <- function(alpha, beta, c, G_s, degrees, n){
  #maximize the regularized likelihood
  
  N <- n*2
  higher_like <- regularized_like_N(alpha, beta, c, N+5, G_s, degrees, n)
  lower_like <- regularized_like_N(alpha, beta, c, N-5, G_s, degrees, n)
  higher_search <- higher_like > lower_like
  if(higher_search){
    N <- N+5
  }
  else{
    N <- N-5
  }
  go <- T
  i <- 0 
  while (go){
    if(higher_search){
      new_higher_like <- regularized_like_N(alpha, beta, c, N+5, G_s, degrees, n)
      if (new_higher_like > higher_like){
        N <- N+5
        higher_like <- new_higher_like
      }
      else{
        return(N+5)
      }
    }
    else{
      new_lower_like <- regularized_like_N(alpha, beta, c, N-10, G_s, degrees, n)
      if (new_lower_like > lower_like){
        N <- N-5
        lower_like <- new_lower_like
      }
      else{
        return(N-5)
      }
    }
  }
  
  i <- i +1 
  
  if (i > 5000){
    print("exceeded")
    return(N)
  }
}




rds <- readRDS("PWID_rds_data.rds")
true_network <- readRDS("App_True_Graph.rds")
true_lambda <- 0.23

#generate times based on true graph
s_vector <- Likelihood_components(rds, true_network)$s
times <- rexp(length(s_vector)-1, as.vector(s_vector)[-1]*true_lambda)
rds$times <- c(0,times)

seeds <- rds$seeds

#initialize parameters
current_sample <- rds$network_recruited
mle <- mle_lambda(rds, current_sample, seeds)
current_log_likelihood <- log_likelihood(rds, current_sample, mle, seeds)

time <- Sys.time()
iterations_large <- 15000
iterations_small <- 5000

for(i in 1:iterations_large){
  #propose new graph - equally likely to propose subtraction and addition
  proposal <- new_proposal_flip(rds, current_sample)
  proposal <- proposal$new_sample
  
  #find lambda mle and determine likelihood at the mle
  proposal_mle <- mle_lambda(rds, proposal, seeds)
  proposal_likelihood <- log_likelihood(rds, proposal, proposal_mle, seeds)
  
  
  if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
    
    print(as.matrix(proposal_likelihood - as.matrix(current_log_likelihood)))
    
    #climb is proposal is better than current parameters
    current_log_likelihood <- proposal_likelihood
    current_sample <- proposal
    mle <- proposal_mle
    print(as.numeric(sum(current_sample)))
    print(mle)
  }
  
  print(i)
  
}


print(time - Sys.time())

given_mle <- as.numeric(mle)
given_edges <- as.numeric(sum(current_sample))
original_G_S_est <- current_sample
                          
print("given_mle")
print(given_mle)
print("given_edges")
print(given_edges)


true_times <- rds$times


lambda_seq<- seq(given_mle - 0.06, given_mle + 0.06, by = 0.03)

bias_vec <- numeric(length(lambda_seq))
mle_vec <- numeric(length(lambda_seq))

for(j in 1:length(lambda_seq)){
  
  #generate expected value of lambda MLE with set lambda^k
  static_lambda <- lambda_seq[j]
  
  q <- 3
  mle_bias_vec <- numeric(q)
  

  for(k in 1:q){
    
    #generate most likely subgraph with set lambda^k
    rds$times <- true_times
  
    current_sample <- original_G_S_est
    current_log_likelihood <- log_likelihood(rds, current_sample, static_lambda, seeds)
    
    time <- Sys.time()
    for(i in 1:iterations_small){
      
      #evaluate new proposal with static mle
      proposal <- new_proposal_flip(rds, current_sample)
      proposal <- proposal$new_sample
      proposal_likelihood <- log_likelihood(rds, proposal, static_lambda, seeds)
      
      #determine whether new graph is better for static mle
      if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
        current_log_likelihood <- proposal_likelihood
        current_sample <- proposal
      } 
    }
    
    print(Sys.time() - time)
    
    given_G_S <- current_sample
    print(sum(given_G_S))
    
    #setup bias sim
    s_vector <- Likelihood_components(rds, given_G_S)$s
    times <- rexp(length(s_vector)-1, as.vector(s_vector)[-1]*static_lambda)
    times[seeds] <- 0 
    rds$times <- c(0,times)
    
    #estimate bias based on this lambda, G_S pairing
    
    #initialize parameters
    current_sample <- given_G_S
    mle <- mle_lambda(rds, current_sample, seeds)
    current_log_likelihood <- log_likelihood(rds, current_sample, mle, seeds)
    
    time <- Sys.time()
    
    for(i in 1:iterations_small){
      #propose new graph - equally likely to propose subtraction and addition
      proposal <- new_proposal_flip(rds, current_sample)
      proposal <- proposal$new_sample
      
      #find lambda mle and determine likelihood at the mle
      proposal_mle <- mle_lambda(rds, proposal, seeds)
      proposal_likelihood <- log_likelihood(rds, proposal, proposal_mle, seeds)
      
      
      if(as.matrix(proposal_likelihood) > as.matrix(current_log_likelihood)){
        
        #climb is proposal is better than current parameters
        current_log_likelihood <- proposal_likelihood
        current_sample <- proposal
        mle <- proposal_mle
      } 
      
    }
    
    print(Sys.time() - time)
  
    mle_bias_vec[k] <- mle
    print("Iteration")
    print(static_lambda)
    print(mle)
    
    rds$times <- true_times
  }
  mle <- mean(mle_bias_vec)
  print("Average")
  print(static_lambda)
  print(mle)
  
  #calculate the bias
  bias <- mle - static_lambda
  bias_vec[j] <- as.numeric(bias)
  mle_vec[j] <- as.numeric(mle)
}

distances <- abs(mle_vec - given_mle)

true_lambda_est <- lambda_seq[which(min(distances) == distances)]


#final estimate of G_S based on the IIE of lambda

current_sample <- original_G_S_est
current_log_likelihood <- log_likelihood(rds, current_sample, true_lambda_est, seeds)

#hill climb to find optimal G_S
for(i in 1:iterations_large){
  
  #evaluate new proposal with static mle
  proposal <- new_proposal_flip(rds, current_sample)
  proposal <- proposal$new_sample
  proposal_likelihood <- log_likelihood(rds, proposal, true_lambda_est, seeds)
  
  #determine whether new graph is better for static mle
  if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
    current_log_likelihood <- proposal_likelihood
    current_sample <- proposal
  } 
}

true_G_S_est <- current_sample
true_edges_est <- as.numeric(sum(true_G_S_est))

print(given_mle)
print(true_lambda_est)




#accuracy metrics 

true_graph <- true_network

rec_num <- dim(true_graph)[1]


#accuracy metrics for first estimate original_G_S_est 

estimated_subgraph <- original_G_S_est
Accuracy_O <- (sum(true_graph == estimated_subgraph)- rec_num)/( rec_num^2 )
TPR_O <- sum((true_graph == estimated_subgraph) * true_graph)/(sum(estimated_subgraph))
TNR_O <- (sum((true_graph == estimated_subgraph)*(1-true_graph)) - rec_num)/
  (sum(1-estimated_subgraph) - rec_num)




#accuracy metrics for real estimate true_G_S_est

estimated_subgraph <- true_G_S_est
Accuracy_T <- (sum(true_graph == estimated_subgraph)- rec_num)/( rec_num^2 )
TPR_T <- sum((true_graph == estimated_subgraph) * true_graph)/(sum(estimated_subgraph))
TNR_T <- (sum((true_graph == estimated_subgraph)*(1-true_graph)) - rec_num)/
  (sum(1-estimated_subgraph) - rec_num)



n <- dim(rds$network_recruited)[1]

prop <- NA
epsilon <- NA

rds$times <- true_times
alpha <- 0
beta <- 0

c <- 0
#estimate population size

degrees <- rds$degrees #[rds$recruited]
true_graph <- true_network
n <- as.numeric(dim(true_graph)[1])


#the regularized mle given the true subgraph
full_true_est_N <- regularized_mle_N(alpha, beta, c, true_graph, degrees, n)


#the regularized mle given the MLE subgraph
full_Craw_est_N <- regularized_mle_N(alpha, beta, c, original_G_S_est, degrees, n)

#the regularized mle given the IIE subgraph
full_better_est_N <- regularized_mle_N(alpha, beta, c, true_G_S_est, degrees, n)

true_edges <- as.numeric(sum(true_network))

results <- c(prop, epsilon, true_lambda_est, given_mle,
             true_edges, true_edges_est, given_edges,
             Accuracy_T, Accuracy_O,
             TPR_T, TPR_O,
             TNR_T, TNR_O,
             alpha, beta,
             full_true_est_N, 
             full_Craw_est_N, 
             full_better_est_N)

saveRDS(results , paste0("Analysis_pois_", taskID ,".rds"))


  

