
taskID <- as.integer(Sys.getenv("SLURM_ARRAY_TASK_ID"))
require(Matrix)



mle_lambda <- function(rds_network, current_sample,seeds, gamma_prior = NULL){
  #calculate the mle lambda given the current network proposal
  if( is.null(gamma_prior)){
    gamma_prior = c(0,0)
  }
  n <- dim(current_sample)[1]
  components <- Likelihood_components(rds_network = rds_network, proposal = current_sample)
  mle_lambda <- (n - length(seeds) + gamma_prior[1])/(t(components$s) %*% components$w + gamma_prior[2])
  return(mle_lambda)
}

Likelihood_components <- function(rds_network, proposal){
  #Likelihood components for RDS process likelihood
  
  w <- rds_network$times
  u <- rds_network$degrees- rowSums(proposal)
  #let's start constructing s
  coupon_matrix <- rds_network$final_coupon_matrix
  AC <- proposal %*% coupon_matrix
  lt_AC <- AC * as.numeric(lower.tri(AC, diag = T))
  s <- rowSums(t(lt_AC)) + t(coupon_matrix) %*% u 
  return(list(w = w, u = u, s = s, lt_AC = lt_AC))
}


log_likelihood <- function(rds_network, current_sample, lambda, seeds){
  #log likelihood
  
  components <- Likelihood_components(rds_network, current_sample)
  
  n <- dim(current_sample)[1]
  
  log_likelihood <- -(lambda*(t(components$s) %*% components$w) - 
                        sum(log(rep(lambda, length(components$s)-length(seeds))* 
                                  components$s[-seeds]
                        )
                      )
  )
  
  return(log_likelihood)
  
}


log_likelihood_lambda_Gs <- function(rds_network, current_sample, lambda, seeds){
  #log-likelihood of the RDS process
  
  components <- Likelihood_components(rds_network, current_sample)
  
  n <- dim(current_sample)[1]
  
  log_likelihood <- -(lambda*(t(components$s) %*% components$w) - 
                        sum(log(rep(lambda, length(components$s)-length(seeds) )* 
                                  components$s[-seeds]
                        )
                      )
  )
  
  return(log_likelihood)
  
}


new_proposal_flip<- function(rds_network, current_sample){
  #New proposal for hill-climbing to find MLE
  
  #degree resources
  u <- rds_network$degrees - rowSums(current_sample) #[rds_network$recruited]
  #underlying rds network
  base <- rds_network$network_recruited
  #number of nodes
  size <- dim(current_sample)[1]
  #indicate that the loop has found a new proposal
  found <- F
  #when finding a new sample, start at old one
  new_sample <- current_sample
  #start the loop
  while(!found){
    #select random nodes
    if(runif(1) < 0.5){
      difference <- current_sample - base
      subtractable_vertices<- which(difference == 1, arr.ind = T)
      num <- dim(subtractable_vertices)[1]
      if(num == 0){
        return(list(new_sample = current_sample, 0))
      }
      random <- sample(1:num, 1)
      random_vertices <- subtractable_vertices[random,]
      random_vertices <- sort(random_vertices)
      
      if(current_sample[random_vertices[1], random_vertices[2]] == 1 & 
         base[random_vertices[1], random_vertices[2]] == 0){
        #subtract the edge
        new_sample[random_vertices[1], random_vertices[2]]<- 0
        new_sample[random_vertices[2], random_vertices[1]] <- 0
        found <- T
        direction <- "subtract"
      } else{
        print("error")
      }
    } else{
      u_ind <- as.numeric(u>0)
      add_matrix <-(1-current_sample) * (u_ind %*% t(u_ind)) * (1- diag(1,size, size))
      add_vertices<- which(add_matrix == 1, arr.ind = T)
      num <- dim(add_vertices)[1]
      if(num == 0){
        return(list(new_sample = current_sample, 0))
      }
      random <- sample(1:num, 1)
      random_vertices <- add_vertices[random,]
      random_vertices <- sort(random_vertices)
      if (sum(u[random_vertices] > 0) ==2 & current_sample[random_vertices[1], random_vertices[2]] == 0){
        #add the edge
        new_sample[random_vertices[1], random_vertices[2]] <- 1
        new_sample[random_vertices[2], random_vertices[1]] <- 1
        found <- T
        direction <- "add"
      } else{
        print("error")
      }
    }
  }
  return(list(new_sample = new_sample,direction = direction, random_vertices = random_vertices ))
  
}


regression_mle <- function(Y, proposal, cov){
  #MLE for regression component (poisson glm)
  
  X_matrix <- X_matrix_fun_aug(proposal, cov)
  reg_model <- glm(Y ~ log(X_matrix + 0.01), family = poisson(link = "log"))
  return(reg_model)
}




total_log_likelihood<- function(rds, Y, reg_model, Gs, lambda, seeds){
  #combine regress and RDS process likelihoods
  regress_like <- as.numeric(logLik(reg_model))
  craw_like <- log_likelihood_lambda_Gs(rds, Gs, lambda, seeds)
  total_log_like <- regress_like + craw_like 
  return(total_log_like)
}


X_matrix_fun_aug <- function(Gs, covs){
  #generate components for regression likelihood
  
  n <- dim(Gs)[1]
  d_o <- numeric(n)
  for ( i in 1:n ){
    d_o[i] <- sum(Gs[i, 1:(i-1)] * covs[1:(i-1)])
  }
  return(d_o)
}


regularized_like_N <- function(alpha, beta, c, N, G_s, degrees, n){
  #generate the regularized mle for N
  
  d_u <- numeric(n)
  
  for ( i in 1:n ){
    d_u[i] <- degrees[i] - sum(G_s[i, 1:(i-1)])
  }
  
  first_comp <- sum(lchoose(N-1:n,d_u)) 
  second_comp <- lbeta(sum(d_u) + alpha, 
                       n*N - choose(n+1, 2) - sum(d_u) + beta)
  third_comp <- -c*log(N)
  
  return(first_comp + second_comp + third_comp)
}

regularized_mle_N <- function(alpha, beta, c, G_s, degrees, n){
  #maximize the regularized likelihood
  
  N <- n*2
  higher_like <- regularized_like_N(alpha, beta, c, N+5, G_s, degrees, n)
  lower_like <- regularized_like_N(alpha, beta, c, N-5, G_s, degrees, n)
  higher_search <- higher_like > lower_like
  if(higher_search){
    N <- N+5
  }
  else{
    N <- N-5
  }
  go <- T
  i <- 0 
  while (go){
    if(higher_search){
      new_higher_like <- regularized_like_N(alpha, beta, c, N+5, G_s, degrees, n)
      if (new_higher_like > higher_like){
        N <- N+5
        higher_like <- new_higher_like
      }
      else{
        return(N+5)
      }
    }
    else{
      new_lower_like <- regularized_like_N(alpha, beta, c, N-10, G_s, degrees, n)
      if (new_lower_like > lower_like){
        N <- N-5
        lower_like <- new_lower_like
      }
      else{
        return(N-5)
      }
    }
  }
  
  i <- i +1 
  
  if (i > 5000){
    print("exceeded")
    return(N)
  }
}


#read in data


print("start")

rds <- readRDS("PWID_rds_data.rds")
true_network <- readRDS("App_True_Graph.rds")
true_lambda <- 0.23

#generate times based on true graph
s_vector <- Likelihood_components(rds, true_network)$s
times <- rexp(length(s_vector)-1, as.vector(s_vector)[-1]*true_lambda)
rds$times <- c(0,times)

seeds <- rds$seeds

#set regression parameters
n <- dim(rds$network_recruited)[1]

X_matrix <- X_matrix_fun_aug(true_network, rds$rec_cov)
beta <- 1
#approximate the poisson model with a glm
mu <- 0.01
Y <- rpois(n, beta*X_matrix + mu)
model <- glm(Y ~ log(X_matrix + mu), family = poisson(link = "log"))


#initialize parameters
current_sample <- rds$network_recruited
mle <- mle_lambda(rds, current_sample, seeds)
current_log_likelihood <- -Inf

time <- Sys.time()

iterations_large <- 15000
iterations_small <- 5000

for(i in 1:iterations_large){
  #propose new graph - equally likely to propose subtraction and addition
  proposal <- new_proposal_flip(rds, current_sample)
  proposal <- proposal$new_sample
  
  #find lambda mle and determine likelihood at the mle
  proposal_lambda_mle <- mle_lambda(rds, proposal, seeds)
  proposal_regress <- regression_mle(Y, proposal, rds$rec_cov)
  proposal_likelihood <- total_log_likelihood(rds, Y, proposal_regress, proposal, proposal_lambda_mle, seeds)
  
  
  if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
    #climb is proposal is better than current parameters
    current_log_likelihood <- proposal_likelihood
    current_sample <- proposal
    mle <- proposal_lambda_mle
    current_regress <- proposal_regress
  }
  
}

print(Sys.time() - time)

given_mle <- as.numeric(mle)
given_edges <- as.numeric(sum(current_sample))
original_G_S_est <- current_sample
given_s <- as.vector(Likelihood_components(rds, original_G_S_est)$s)

print("truth")
print(given_edges)
print(true_lambda)

print("given_mle")
print(given_mle)
print("given_edges")
print(given_edges)


true_times <- rds$times


lambda_seq<- seq(given_mle - 0.06 , given_mle + 0.06, by = 0.03)

bias_vec <- numeric(length(lambda_seq))
mle_vec <- numeric(length(lambda_seq))

for(j in 1:length(lambda_seq)){
  
  #generate expected value of lambda MLE with set lambda^k
  static_lambda <- lambda_seq[j]
  
  q <- 3
  mle_bias_vec <- numeric(q)
  

  for(k in 1:q){
    
    #generate most likely subgraph with set lambda^k
    rds$times <- true_times
    
    #initialize parameters
    current_sample <- original_G_S_est
    current_log_likelihood <- -Inf
    
    for(i in 1:iterations_small){
      #propose new graph - equally likely to propose subtraction and addition
      proposal <- new_proposal_flip(rds, current_sample)
      proposal <- proposal$new_sample
      
      #find lambda mle and determine likelihood at the mle
      proposal_regress <- regression_mle(Y, proposal, rds$rec_cov)
      proposal_likelihood <- total_log_likelihood(rds, Y, proposal_regress, proposal, static_lambda, seeds)
      
      
      if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
        
        #climb is proposal is better than current parameters
        current_log_likelihood <- proposal_likelihood
        current_sample <- proposal
      }
    }
    
    given_G_S <- current_sample
    print(sum(given_G_S))
    
    #setup bias sim
    s_vector <- Likelihood_components(rds, given_G_S)$s
    times <- rexp(length(s_vector)-1, as.vector(s_vector)[-1]*static_lambda)
    rds$times <- c(0,times)
    
    #estimate bias based on this lambda, hill-climbed G_S pairing
    
    
    #initialize parameters
    current_sample <- given_G_S
    mle <- mle_lambda(rds, given_G_S, seeds)
    current_log_likelihood <- -Inf
    
    for(i in 1:iterations_small){
      #propose new graph - equally likely to propose subtraction and addition
      proposal <- new_proposal_flip(rds, current_sample)
      proposal <- proposal$new_sample
      
      #find lambda mle and determine likelihood at the mle
      proposal_lambda_mle <- mle_lambda(rds, proposal, seeds)
      proposal_regress <- regression_mle(Y, proposal, rds$rec_cov)
      proposal_likelihood <- total_log_likelihood(rds, Y, proposal_regress, proposal, proposal_lambda_mle, seeds)
      
      
      if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
        
        #climb if proposal is better than current parameters
        current_log_likelihood <- proposal_likelihood
        current_sample <- proposal
        mle <- proposal_lambda_mle
        current_regress <- proposal_regress
      }
    }
    
    
    mle_bias_vec[k] <- mle
    print(mle)
    
    rds$times <- true_times
  }
  mle <- mean(mle_bias_vec)
  print(static_lambda)
  print(mle)
  #calculate the bias
  bias <- mle - static_lambda
  bias_vec[j] <- as.numeric(bias)
  mle_vec[j] <- as.numeric(mle)
}

distances <- abs(mle_vec - given_mle)

true_lambda_est <- lambda_seq[which(min(distances) == distances)]


#final estimate of G_S based on the IIE of lambda

#initialize parameters
current_sample <- rds$network_recruited
mle <- mle_lambda(rds, current_sample, seeds)
current_log_likelihood <- -Inf

for(i in 1:iterations_large){
  #propose new graph - equally likely to propose subtraction and addition
  proposal <- new_proposal_flip(rds, current_sample)
  proposal <- proposal$new_sample
  
  #find lambda mle and determine likelihood at the mle
  proposal_regress <- regression_mle(Y, proposal, rds$rec_cov)
  proposal_likelihood <- total_log_likelihood(rds, Y, proposal_regress, proposal, true_lambda_est, seeds)
  #print(proposal_regress$beta)
  
  if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
    
    #climb is proposal is better than current parameters
    current_log_likelihood <- proposal_likelihood
    current_sample <- proposal
    current_regress <- proposal_regress
  } 
}

true_G_S_est <- current_sample
true_edges_est <- as.numeric(sum(true_G_S_est))
regress_better <- current_regress

print(true_edges_est)


#accuracy metrics 

true_graph <- true_network

rec_num <- dim(true_graph)[1]


#accuracy metrics for first estimate original_G_S_est 

estimated_subgraph <- original_G_S_est
Accuracy_O <- (sum(true_graph == estimated_subgraph)- rec_num)/( rec_num^2 )
TPR_O <- sum((true_graph == estimated_subgraph) * true_graph)/(sum(estimated_subgraph))
TNR_O <- (sum((true_graph == estimated_subgraph)*(1-true_graph)) - rec_num)/
  (sum(1-estimated_subgraph) - rec_num)




#accuracy metrics for real estimate true_G_S_est

estimated_subgraph <- true_G_S_est
Accuracy_T <- (sum(true_graph == estimated_subgraph)- rec_num)/( rec_num^2 )
TPR_T <- sum((true_graph == estimated_subgraph) * true_graph)/(sum(estimated_subgraph))
TNR_T <- (sum((true_graph == estimated_subgraph)*(1-true_graph)) - rec_num)/
  (sum(1-estimated_subgraph) - rec_num)




#Use true G_S in network regression and estimation of population size
n <- dim(rds$network_recruited)[1]

prop <- NA
epsilon <- NA

rds$times <- true_times
alpha <- 0
beta <- 0
c <- 0
#estimate population size

degrees <- rds$degrees
true_graph <- true_network
n <- as.numeric(dim(true_graph)[1])

#the regularized mle given the true subgraph
full_true_est_N <- regularized_mle_N(alpha, beta, c, true_graph, degrees, n)


#the regularized mle given the MLE subgraph
full_Craw_est_N <- regularized_mle_N(alpha, beta, c, original_G_S_est, degrees, n)

#the regularized mle given the IIE subgraph
full_better_est_N <- regularized_mle_N(alpha, beta, c, true_G_S_est, degrees, n)
  
true_edges <- as.numeric(sum(true_network))
  
results <- c(prop, epsilon, true_lambda_est, given_mle,
                              true_edges, true_edges_est, given_edges,
                              Accuracy_T, Accuracy_O,
                              TPR_T, TPR_O,
                              TNR_T, TNR_O,
                              alpha, beta,
                              full_true_est_N, 
                              full_Craw_est_N, 
                              full_better_est_N)


saveRDS(results, paste0("RegAnalysis_pois_", taskID ,".rds")) 





