
taskID <- as.integer(Sys.getenv("SLURM_ARRAY_TASK_ID"))
#taskID <- 1

require(Matrix)


Np = c(10, 10, 10)
N = c(5000,5000,5000)

sim_grid <- expand.grid(Np, N)

#pop_params <- sim_grid[as.integer((taskID-1000)/100)+1,]
pop_params <- sim_grid[as.integer(taskID/100)+1,]

epsilon = c(0.3,0.3, 0.3)
prop = c(0.6,0.6,0.6)

sim_grid <- expand.grid(epsilon, prop)

#block_params <- sim_grid[as.integer((taskID-1000)/100)+1,]
block_params <- sim_grid[as.integer(taskID/100)+1,]


#Generate Graph
symm_binom_graph <- function(N,prob){
  connects <- rbinom(N*N, 1, prob)
  assym_graph <- Matrix(connects, N, N, sparse = T) * as.numeric(lower.tri(diag(N), diag = F))
  assym_graph_trans <- t(assym_graph) + assym_graph
  return(assym_graph_trans)
}


#symm_beta_binom_graph <- function(N,prob){
#  alpha <- 1
#  beta <- (alpha*(1-prob))/prob
#  probs <- rbeta(N*N, alpha, beta)
#  connects <- rbinom(N*N, 1, probs)
#  print(var(connects))
#  assym_graph <- Matrix(connects, N, N, sparse = T) * as.numeric(lower.tri(diag(N), diag = F))
#  assym_graph_trans <- t(assym_graph) + assym_graph
#  return(assym_graph_trans)
#} - actually the same as bernoulli prob!


stochastic_block_graph <- function(N, prob, prop){
  group_mem <- rbinom(N, 1, prop)
  #the two groups
  n_A = sum(group_mem)
  n_B = N-n_A
  #ratio between in_prob and out_prob
  in_out = 2
  
  #calculation of in_prob based on overall prevalence
  in_prob <- (in_out*prob*N*(N-1) )/(2*n_A*n_B + in_out*( n_A*(n_A-1) + n_B*(n_B -1) ))
  out_prob <- in_prob/in_out
  
  #print((2*n_A*n_B*out_prob + (n_A*(n_A-1) + n_B*(n_B -1))*in_prob)/(N*(N-1))) - check
  
  #form building block matrices using in and out probs
  in_connects <- rbinom(N*N, 1, in_prob)
  in_matrix <- Matrix(in_connects, N, N, sparse = T)
  out_connects <- rbinom(N*N, 1, out_prob)
  out_matrix <- Matrix(out_connects, N, N, sparse = T)
  
  in_group_matrix <- in_matrix * Matrix(group_mem %*% t(group_mem) +
                                          (1-group_mem) %*% t(1-group_mem), sparse = T)
  
  #print(sum(in_group_matrix)/(n_A^2 + n_B^2))
  
  bet_group_matrix <- out_connects * Matrix((1-group_mem) %*% t(group_mem) + 
                                              (group_mem) %*% t(1-group_mem), sparse = T)
  
  #print(sum(bet_group_matrix)/(2*n_A*n_B))
  
  assym_graph <- (in_group_matrix + bet_group_matrix) * as.numeric(lower.tri(diag(N), diag = F))
  sym_graph <- t(assym_graph) + assym_graph
  print("stochastic block")
  print(mean(sym_graph))
  return(sym_graph)
}


 


best_RDS_sample_craw_multiple_bare<- function(net, coupons, seeds, rate, interview_limit = NULL, covariates = NULL ){
  #Main automated RDS Sampler
  
  rec_cov <- NULL
  
  size = length(net[,1])
  
  if(is.null(interview_limit)){
    interview_limit = size
  }
  
  sample = T
  recruited <- seeds
  network_observed <- Matrix(NA, size,size)
  direction_network_observed <- Matrix(NA, size,size)
  coupon_vector <- rep(0, size)
  coupon_vector[seeds] <- coupons
  degrees <- rowSums(net) 
  wait_time_mat <- matrix(NA,size,size)
  #Insert wait times for all seed recruits
  
  #identify neighbors of seeds
  for (seed in seeds){
    
    #identify neighbors of the seed
    neighbors <- which(net[seed,] ==1)
    
    #check if potential recruits were already recruited
    potentials <-  neighbors[!(neighbors %in% recruited)]
    
    #generate wait times
    wait_times <- rexp(length(potentials), rate = rate)
    
    #add the wait times to the wait time matrix
    wait_time_mat[seed, potentials] <- wait_times
    
    
    #set the wait times going to recruited to NA
    wait_time_mat[,seed] <- NA
    
  }
  
  
  
  #store coupon matrix and times
  times <- rep(0,length(seeds))
  coupon_matrix <- Matrix(0,size, size)
  k <- length(seeds) + 1
  
  
  while (sample){
    
    
    #Fill in coupon matrix
    coupon_matrix[,k] <- as.numeric(coupon_vector > 0)
    
    
    #increment time
    k <- k+1
    
    # now let's pick out the next observed edge
    r_c <- which(min(wait_time_mat, na.rm = T) == wait_time_mat, arr.ind = T)
    time <- min(wait_time_mat, na.rm = T)
    
    #add time
    times <- c(times, time)
    
    #fill in RDS directed network
    direction_network_observed[r_c[1], r_c[2]] <- 1
    
    #fill in the observed edge in symmetric network
    network_observed[r_c[1], r_c[2]] <- 1
    network_observed[r_c[2], r_c[1]] <- 1
    
    #add recruited to recruited list
    recruited <- c(recruited, r_c[2])
    newly_recruited <- r_c[2]
    
    #Give coupons to recruited 
    coupon_vector[r_c[2]] <- coupons
    
    #subtract a coupon from the recruiter
    coupon_vector[r_c[1]] <-  coupon_vector[r_c[1]] - 1
    
    #set wait times associated with a node with no coupons to NA
    if(length(r_c) > 0){
      if(coupon_vector[r_c[1]] == 0){
        wait_time_mat[r_c[1],] <- NA
      }
    }
    
    #set the wait times going to recruited to NA
    wait_time_mat[,r_c[2]] <- NA
    
    #increment time
    wait_time_mat[!is.na(wait_time_mat)]<- wait_time_mat[!is.na(wait_time_mat)] - time
    #print(wait_time_mat)
    
    #generate wait times for potential edges of newly recruited node
    
    #identify neighbors of newly_recruited
    neighbors <- which(net[newly_recruited,] ==1)
    
    #check if potential recruits were already recruited
    potentials <-  neighbors[!(neighbors %in% recruited)]
    
    #generate wait times
    wait_times <- rexp(length(potentials), rate = rate)
    
    #add the wait times to the wait time matrix
    wait_time_mat[newly_recruited, potentials] <- wait_times
    
    #print(wait_time_mat)
    
    #print(recruited)
    #print(coupon_vector)
    
    sample = !( all(coupon_vector==0) |
                  all(is.na(wait_time_mat)) | 
                  length(recruited) >= interview_limit )
    
  }
  
  
  #arrange the coupon matrix and cut off time at last recruitment event
  final_coupon_matrix <- coupon_matrix[recruited,1:length(recruited)]
  
  #let's rearrange the observed network so it lines up with the recruitment order
  network_recruited <- network_observed
  network_recruited[which(is.na(network_observed))] <- 0 
  network_recruited <- network_recruited[recruited,recruited]
  
  #true network
  true_network <- net[recruited, recruited]
  
  #covariates of recruited participants
  if(!is.null(covariates)){
    rec_cov <- covariates[recruited]
  }
  
  return(list(rec_cov = rec_cov, true_network = true_network, 
              network = network_observed,
              network_recruited = network_recruited, 
              recruited = recruited, coupon_vector = coupon_vector, times = times,
              #wait_time_mat = wait_time_mat, 
              #direction_network_observed = direction_network_observed, 
              degrees = degrees, coupon_matrix = coupon_matrix,
              #final_coupon_matrix = final_coupon_matrix,
              seeds = seeds,k=k ))
}


mle_lambda <- function(rds_network, current_sample,seeds, gamma_prior = NULL){
  #calculate the mle lambda given the current network proposal
  if( is.null(gamma_prior)){
    gamma_prior = c(0,0)
  }
  n <- dim(current_sample)[1]
  components <- Likelihood_components(rds_network = rds_network, proposal = current_sample)
  mle_lambda <- (n - length(seeds) + gamma_prior[1])/(t(components$s) %*% components$w + gamma_prior[2])
  #print(n-seeds)
  return(mle_lambda)
}

Likelihood_components <- function(rds_network, proposal){
  #Likelihood components for Crawford Likelihood
  
  w <- rds_network$times
  u <- rds_network$degrees[rds_network$recruited] - rowSums(proposal)
  #let's start constructing s
  coupon_matrix <- rds_network$final_coupon_matrix
  AC <- proposal %*% coupon_matrix
  lt_AC <- AC * as.numeric(lower.tri(AC, diag = T))
  s <- rowSums(t(lt_AC)) + t(coupon_matrix) %*% u 
  return(list(w = w, u = u, s = s, lt_AC = lt_AC))
}


log_likelihood <- function(rds_network, current_sample, lambda, seeds){
  #MAP Finder Log Likelihood
  
  components <- Likelihood_components(rds_network, current_sample)
  
  n <- dim(current_sample)[1]
  
  #this looks ugly but it's the product of changing it around a bunch of times just to make it as explicit as possible
  log_likelihood <- -(lambda*(t(components$s) %*% components$w) - 
                        sum(log(rep(lambda, length(components$s)- length(seeds))* 
                                  components$s[-(1:length(seeds))]
                        )
                      )
  )
  
  #print(components$w*components$s)
  
  return(log_likelihood)
  
}


log_likelihood_lambda_Gs <- function(rds_network, current_sample, lambda, seeds){
  #MAP Finder Log Likelihood
  
  components <- Likelihood_components(rds_network, current_sample)
  
  n <- dim(current_sample)[1]
  
  #this looks ugly but it's the product of changing it around a bunch of times just to make it as explicit as possible
  log_likelihood <- -(lambda*(t(components$s) %*% components$w) - 
                        sum(log(rep(lambda, length(components$s)- length(seeds) )* 
                                  components$s[-(1:length(seeds))]
                        )
                        )
  )
  
  #print(components$w*components$s)
  
  return(log_likelihood)
  
}


new_proposal_flip<- function(rds_network, current_sample){
  #New proposal network for mcmc
  
  #degree resources
  u <- rds_network$degrees[rds_network$recruited] - rowSums(current_sample)
  #underlying rds network
  base <- rds_network$network_recruited
  #number of nodes
  size <- dim(current_sample)[1]
  #indicate that the loop has found a new proposal
  found <- F
  #when finding a new sample, start at old one
  new_sample <- current_sample
  #start the loop
  while(!found){
    #select random nodes
    if(runif(1) < 0.5){
      difference <- current_sample - base
      subtractable_vertices<- which(difference == 1, arr.ind = T)
      num <- dim(subtractable_vertices)[1]
      if(num == 0){
        return(list(new_sample = current_sample, 0))
      }
      #print(num)
      random <- sample(1:num, 1)
      random_vertices <- subtractable_vertices[random,]
      random_vertices <- sort(random_vertices)
      #print(subtractable_vertices)
      if(current_sample[random_vertices[1], random_vertices[2]] == 1 & 
         base[random_vertices[1], random_vertices[2]] == 0){
        #subtract the edge
        new_sample[random_vertices[1], random_vertices[2]]<- 0
        new_sample[random_vertices[2], random_vertices[1]] <- 0
        found <- T
        direction <- "subtract"
      } else{
        print("error")
      }
    } else{
      u_ind <- as.numeric(u>0)
      add_matrix <-(1-current_sample) * (u_ind %*% t(u_ind)) * (1- diag(1,size, size))
      add_vertices<- which(add_matrix == 1, arr.ind = T)
      num <- dim(add_vertices)[1]
      if(num == 0){
        return(list(new_sample = current_sample, 0))
      }
      random <- sample(1:num, 1)
      random_vertices <- add_vertices[random,]
      random_vertices <- sort(random_vertices)
      #print(add_vertices)
      if (sum(u[random_vertices] > 0) ==2 & current_sample[random_vertices[1], random_vertices[2]] == 0){
        #add the edge
        new_sample[random_vertices[1], random_vertices[2]] <- 1
        new_sample[random_vertices[2], random_vertices[1]] <- 1
        found <- T
        direction <- "add"
      } else{
        print("error")
      }
    }
    #print(random_vertices)
    #print(num)
    #if(random_vertices[1] < 5){
    #  return(list(new_sample = current_sample, 0))
    #}
    
    
    
    #if the edge between the two random nodes is present, but is absent from baseline RDS
    
    #print(found)
  }
  #print(direction)
  return(list(new_sample = new_sample,direction = direction, random_vertices = random_vertices ))
  
}



new_proposal<- function(rds_network, current_sample){
  #New proposal network for mcmc
  
  #degree resources
  u <- rds_network$degrees[rds_network$recruited] - rowSums(current_sample)
  #underlying rds network
  base <- rds_network$network_recruited
  #number of nodes
  size <- dim(current_sample)[1]
  #indicate that the loop has found a new proposal
  found <- F
  #when finding a new sample, start at old one
  new_sample <- current_sample
  #start the loop
  while(!found){
    #select random nodes
    random_vertices<- sample(x = 1:size, size = 2, replace = FALSE)
    random_vertices <- sort(random_vertices)
    #print(random_vertices)
    #if the degree of the two vertices is sufficient and the edge does not exist, then:
    if (sum(u[random_vertices] > 0) ==2 & current_sample[random_vertices[1], random_vertices[2]] == 0){
      #add the edge
      new_sample[random_vertices[1], random_vertices[2]] <- 1
      new_sample[random_vertices[2], random_vertices[1]] <- 1
      found <- T
      direction <- "add"
    }
    #if the edge between the two random nodes is present, but is absent from baseline RDS
    if(current_sample[random_vertices[1], random_vertices[2]] == 1 & 
       base[random_vertices[1], random_vertices[2]] == 0){
      #subtract the edge
      new_sample[random_vertices[1], random_vertices[2]]<- 0
      new_sample[random_vertices[2], random_vertices[1]] <- 0
      found <- T
      direction <- "subtract"
    }
  }
  #print(direction)
  return(list(new_sample = new_sample,direction = direction, random_vertices = random_vertices ))
  
}

beta_mle <- function(Y, X_matrix){
  beta <- solve(t(X_matrix) %*% X_matrix) %*% t(X_matrix) %*% Y
  return(beta)
}

sigma_mle <- function(Y, beta, X_matrix){
  sigma <- mean((Y-X_matrix %*% beta)^2)
  return(sigma)
}


log_likelihood_Beta_sigma <- function(Y, X_matrix, beta, sigma){
  predictions <- X_matrix %*% solve(t(X_matrix) %*% X_matrix) %*% t(X_matrix) %*% Y 
  SSE <- sum(  (  (Y - predictions)/sigma  )^2  )
  log_normal_like <- -(n/2)*log(2*pi)- n*log(sigma)- 0.5*SSE 
  return(log_normal_like)
}

total_log_likelihood<- function(rds, Y, regress_vec, Gs,lambda, seeds){
  regress_like <- log_likelihood_Beta_sigma(Y, regress_vec$X_matrix, regress_vec$beta, regress_vec$sigma)
  craw_like <- log_likelihood_lambda_Gs(rds, Gs, lambda, seeds)
  total_log_like <- regress_like + craw_like 
  return(total_log_like)
}

regression_mle <- function(Y, Gs){
  
  X_matrix <- X_matrix_fun(Gs)
  beta <- beta_mle(Y, X_matrix)
  sigma <- sigma_mle(Y, beta, X_matrix)
  
  return(list(X_matrix = X_matrix, beta = beta, sigma = sigma))
}

X_matrix_fun <- function(Gs){
  n <- dim(Gs)[1]
  d_o <- numeric(n)
  for ( i in 1:n ){
    d_o[i] <- sum(Gs[i, 1:(i-1)])
  }
  
  X_matrix <- cbind(1, d_o)
  return(X_matrix)
}

estimate_N <- function(G_s, p_bar, n, degrees){
  
  d_u <- numeric(n)
  
  for ( i in 1:n ){
    d_u[i] <- degrees[i] - sum(G_s[i, 1:(i-1)])
  }
  
  est_N <- (n+1)/2 + (1/(p_bar*n))*sum(d_u)
  
  return(est_N)
  
}

log_posterior_N <- function(alpha, beta, c, N, G_s, degrees, n){
  d_u <- numeric(n)
  
  for ( i in 1:n ){
    d_u[i] <- degrees[i] - sum(G_s[i, 1:(i-1)])
  }
  
  first_comp <- sum(lchoose(N-1:n,d_u)) 
  second_comp <- lbeta(sum(d_u) + alpha, 
                       n*N - choose(n+1, 2) - sum(d_u) + beta)
  third_comp <- -c*log(N)
  
  return(first_comp + second_comp + third_comp)
}


max_posterior_N <- function(alpha, beta, c, G_s, degrees, n, true_N){
  N <- n*10
  higher_like <- log_posterior_N(alpha, beta, c, N+5, G_s, degrees, n)
  lower_like <- log_posterior_N(alpha, beta, c, N-5, G_s, degrees, n)
  higher_search <- higher_like > lower_like
  if(higher_search){
    N <- N+5
  }
  else{
    N <- N-5
  }
  go <- T
  i <- 0 
  while (go){
    if(higher_search){
      new_higher_like <- log_posterior_N(alpha, beta, c, N+5, G_s, degrees, n)
      if (new_higher_like > higher_like){
        #print(1)
        N <- N+5
        higher_like <- new_higher_like
      }
      else{
        #print(2)
        return(N+5)
      }
    }
    else{
      new_lower_like <- log_posterior_N(alpha, beta, c, N-5, G_s, degrees, n)
      if (new_lower_like > lower_like){
        N <- N-5
        lower_like <- new_lower_like
        #print(3)
      }
      else{
        #print(4)
        return(N-5)
      }
    }
    
    i <- i +1 
    
    if (i > 17000){
      print("exceeded")
      return(N)
    }
  }
}



#get the degrees to be right

sbm_log_likelihood <- function(parameters,
                               degrees,
                               G_s,
                               group_mem,
                               alpha_in, beta_in,
                               alpha_out, beta_out){
  
  n <- dim(as.matrix(G_s))[1]
  
  #print(parameters)
  
  p_in <- parameters[1]
  p_ratio <- parameters[2]
  N_A <- round(parameters[3])
  N_ratio <- parameters[4]
  
  d_u <- numeric(n)
  
  for ( i in 1:n ){
    d_u[i] <- degrees[i] - sum(G_s[i, 1:(i-1)])
  }
  
  #misnamed as d_A
  d_A <- numeric(n)
  for ( i in 1:n ){
    d_A[i] <- sum(group_mem[1:i])
  }
  
  #d_B <- numeric(n)
  #G_s_B <- apply(G_s, 1, function(x){x*(1-group_mem)})
  #for ( i in 1:n){
  #  d_B[i] <- sum(G_s_B[i, 1:(i-1)])
  #}
  
  log_like <- 0
  
  #change this maybe
  N <- round(N_A + N_A*N_ratio + sum(1-group_mem) + 1)
  
  p_out <- p_ratio*p_in
  
  for (i in 1:n){
    
    N_i <- group_mem[i]*(N_A - d_A[i]) + (1-group_mem[i])*(N -i - (N_A - d_A[i]))
    
    like_i <- 0
    
    #for (j in max(0, d_u[i] - ((N - i) - N_i)):min(d_u[i], N_i)){
    for (j in 0:d_u[i]) {
      
      like_i_j <-  dbinom(j, N_i, p_in, log= F)
      
      like_i_j <- like_i_j * dbinom(d_u[i]-j, (N - i) - N_i, p_out, log= F)
      
      like_i <- like_i + like_i_j
      
    }
    
    log_like <- log_like + log(like_i)
    
  }
  
  #print(log_like)
  
  log_prior <- dbeta(p_in, alpha_in, beta_in, log = T)
  log_prior <- log_prior + dbeta(p_out, alpha_out, beta_out, log = T)
  
  #print(log_prior)
  
  #print(parameters)
  
  
  return(-(log_like + log_prior)) #minimize negative log-likelihood
  
}

optim_sbm_likelihood <- function(start_parameters,
                                 degrees,
                                 G_s, 
                                 group_mem,
                                 alpha_in, beta_in,
                                 alpha_out, beta_out){
  
  values <- optim(start_parameters, function(x){sbm_log_likelihood(x,
                                                                   degrees,
                                                                   G_s,
                                                                   group_mem,
                                                                   alpha_in, beta_in,
                                                                   alpha_out, beta_out)},
                  method = "L-BFGS-B", 
                  lower = c(0.00001,0.00001,100, 0.00001), upper = c(1, 1, Inf, 1),
                  control = list(parscale = start_parameters)
  )
  
  return(values$par)
  
}





block_model <- T
#READ IN DATA AND SIMULATE TIMES

rds_results <- readRDS(paste0('rds_', taskID, '.rds'))
#rds_results <- readRDS("~/Research/Crawford Paper Finals/Current Cluster Files/rds_1.rds")
rds <- rds_results$rds

if(block_model){
  N <- 5000
  prob <- rds_results$in_prob
  epsilon <- as.double(block_params[1])
  prop <- as.double(block_params[2])
  in_prob <- rds_results$in_prob
  out_prob <- rds_results$out_prob
  group_mem <- rds_results$group_mem
  group_mem_total <- rds_results$group_mem_total
}else{
  epsilon <- NA
  prop <- NA
  N<- as.double(pop_params[2])
  prob <- as.double(pop_params[1]/pop_params[2])
}


seeds <- rds$seeds
components <- Likelihood_components(rds, rds$true_network)
s_vector <- components$s
print("start")
times <- rexp(length(s_vector)-length(seeds), as.vector(s_vector)[-(1:length(seeds))])
#print(times)
rds$times <- c(rep(0, length(seeds)),times)
print(sum(rds$true_network))

#MLE OF TRUE GRAPH
best_mle_know <-as.numeric(mle_lambda(rds, rds$true_network, seeds))
#print( mle_lambda(rds, rds$true_network, 1))
#print(var(as.vector(Likelihood_components(rds, rds$true_network)$s*c(0,times))))

best_likelihood_know <- log_likelihood(rds, rds$true_network, best_mle_know, seeds)

print(best_mle_know)


#generate regression data
true_graph <- rds$true_network

n <- dim(true_graph)[1]

true_beta <- c(0,1)

sigma <- 1

X_matrix <- X_matrix_fun(true_graph)

Y <- X_matrix %*% true_beta + rnorm(n, 0, sigma)


small_sim <- 5000
big_sim <- 7000




#initialize parameters
current_sample <- rds$network_recruited
mle <- mle_lambda(rds, rds$network_recruited, seeds)
current_log_likelihood <- -Inf

for(i in 1:small_sim){
  #propose new graph - equally likely to propose subtraction and addition
  proposal <- new_proposal_flip(rds, current_sample)
  proposal <- proposal$new_sample
  
  #find lambda mle and determine likelihood at the mle
  proposal_lambda_mle <- mle_lambda(rds, proposal, seeds)
  proposal_regress <- regression_mle(Y, proposal)
  proposal_likelihood <- total_log_likelihood(rds, Y, proposal_regress, proposal, proposal_lambda_mle, seeds)
  
  
  if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
    
    #climb is proposal is better than current parameters
    current_log_likelihood <- proposal_likelihood
    current_sample <- proposal
    mle <- proposal_lambda_mle
    current_regress <- proposal_regress
  }
}

given_mle <- as.numeric(mle)
given_edges <- as.numeric(sum(current_sample))
original_G_S_est <- current_sample
given_s <- as.vector(Likelihood_components(rds, original_G_S_est)$s)

print("given_mle")
print(given_mle)
print("given_edges")
print(given_edges)


true_times <- rds$times


lambda_seq<- seq(given_mle - 0.4 , given_mle + 0.4, by = 0.1)
#lambda_seq <- c(1)
#get rid
bias_vec <- numeric(length(lambda_seq))
mle_vec <- numeric(length(lambda_seq))

for(j in 1:length(lambda_seq)){
  
  #pretend that the mles are the truth
  static_lambda <- lambda_seq[j]
  
  #measure bias
  q <- 20
  #get rid
  mle_bias_vec <- numeric(q)
  

  for(k in 1:q){
    #set back to truth one more time for good measure
    rds$times <- true_times
    
    #estimate G_S
    
    #hill climb to find optimal G_S
    
    #initialize parameters
    current_sample <- rds$network_recruited
    current_log_likelihood <- -Inf
    
    for(i in 1:small_sim){
      #propose new graph - equally likely to propose subtraction and addition
      proposal <- new_proposal_flip(rds, current_sample)
      proposal <- proposal$new_sample
      
      #find lambda mle and determine likelihood at the mle
      proposal_regress <- regression_mle(Y, proposal)
      proposal_likelihood <- total_log_likelihood(rds, Y, proposal_regress, proposal, static_lambda, seeds)
      
      
      if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
        
        #climb is proposal is better than current parameters
        current_log_likelihood <- proposal_likelihood
        current_sample <- proposal
      }
    }
    
    given_G_S <- current_sample
    print(sum(given_G_S))
    
    #setup bias sim
    s_vector <- Likelihood_components(rds, given_G_S)$s
    times <- rexp(length(s_vector)-length(seeds), as.vector(s_vector)[-(1:length(seeds))])
    #print(times)
    rds$times <- c(rep(0, length(seeds)),times)
    
    #estimate bias based on this lambda, hill-climbed G_S pairing
    
    
    #initialize parameters
    current_sample <- rds$network_recruited
    mle <- mle_lambda(rds, rds$network_recruited, seeds)
    current_log_likelihood <- -Inf
    
    for(i in 1:small_sim){
      #propose new graph - equally likely to propose subtraction and addition
      proposal <- new_proposal_flip(rds, current_sample)
      proposal <- proposal$new_sample
      
      #find lambda mle and determine likelihood at the mle
      proposal_lambda_mle <- mle_lambda(rds, proposal, seeds)
      proposal_regress <- regression_mle(Y, proposal)
      proposal_likelihood <- total_log_likelihood(rds, Y, proposal_regress, proposal, proposal_lambda_mle, seeds)
      
      
      if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
        
        #climb if proposal is better than current parameters
        current_log_likelihood <- proposal_likelihood
        current_sample <- proposal
        mle <- proposal_lambda_mle
        current_regress <- proposal_regress
      }
    }
    
    
    mle_bias_vec[k] <- mle
    print(mle)
    
    #set rds times back to the truth
    
    rds$times <- true_times
  }
  mle <- mean(mle_bias_vec)
  print(static_lambda)
  print(mle)
  #calculate the bias
  bias <- mle - static_lambda
  bias_vec[j] <- as.numeric(bias)
  mle_vec[j] <- as.numeric(mle)
  #plot(lambda_seq[1:j], bias_vec[1:j])
}

distances <- abs(mle_vec - given_mle)

true_lambda_est <- lambda_seq[which(min(distances) == distances)]


#let's use this unbiased estimate of lambda to generate an unbiased estimate of G_S



#initialize parameters
current_sample <- rds$network_recruited
mle <- mle_lambda(rds, rds$network_recruited, seeds)
current_log_likelihood <- -Inf

for(i in 1:small_sim){
  #propose new graph - equally likely to propose subtraction and addition
  proposal <- new_proposal_flip(rds, current_sample)
  proposal <- proposal$new_sample
  
  #find lambda mle and determine likelihood at the mle
  proposal_regress <- regression_mle(Y, proposal)
  proposal_likelihood <- total_log_likelihood(rds, Y, proposal_regress, proposal,
                                              true_lambda_est, seeds)
  #print(proposal_regress$beta)
  
  if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
    
    #climb is proposal is better than current parameters
    current_log_likelihood <- proposal_likelihood
    current_sample <- proposal
    current_regress <- proposal_regress
  } 
}

true_G_S_est <- current_sample
true_edges_est <- as.numeric(sum(true_G_S_est))
regress_better <- current_regress

print(true_edges_est)


#accuracy metrics 

true_graph <- rds$true_network

rec_num <- dim(true_graph)[1]


#accuracy metrics for first estimate original_G_S_est 

estimated_subgraph <- original_G_S_est
Accuracy_O <- (sum(true_graph == estimated_subgraph)- rec_num)/( rec_num^2 )
TPR_O <- sum((true_graph == estimated_subgraph) * true_graph)/(sum(estimated_subgraph))
TNR_O <- (sum((true_graph == estimated_subgraph)*(1-true_graph)) - rec_num)/
  (sum(1-estimated_subgraph) - rec_num)




#accuracy metrics for real estimate true_G_S_est

estimated_subgraph <- true_G_S_est
Accuracy_T <- (sum(true_graph == estimated_subgraph)- rec_num)/( rec_num^2 )
TPR_T <- sum((true_graph == estimated_subgraph) * true_graph)/(sum(estimated_subgraph))
TNR_T <- (sum((true_graph == estimated_subgraph)*(1-true_graph)) - rec_num)/
  (sum(1-estimated_subgraph) - rec_num)




#Use true G_S in network regression and estimation of population size
p <- prob
rds$times <- true_times


#regular priors
alpha_vec <- c(5,10,100)
beta_vec <- alpha_vec*(1-prob)/prob
alpha_vec <-  alpha_vec
beta_vec <- beta_vec

#p_in priors
alpha_in_vec <- c(5,10,100)
beta_in_vec <- alpha_in_vec*(1-in_prob)/in_prob
alpha_in_vec <-  alpha_in_vec
beta_in_vec <- beta_in_vec


#p_out priors
alpha_out_vec <- c(5,10,100)
beta_out_vec <- alpha_out_vec*(1-out_prob)/out_prob
alpha_out_vec <-  alpha_out_vec
beta_out_vec <- beta_out_vec

#start_values
start_parameters <- numeric(4)
start_parameters[1] <- in_prob
start_parameters[2] <- out_prob/start_parameters[1]
start_parameters[3] <- sum(group_mem_total)
start_parameters[4] <- sum(1-group_mem_total)/start_parameters[3]

print("N_ratio")
print(start_parameters[4])



c <- 0
#estimate population size
print(given_edges)
degrees <- rds$degrees[rds$recruited]
true_graph <- rds$true_network
n <- as.numeric(dim(true_graph)[1])

results <-  matrix(nrow = 0, ncol = 37)

for (i in 1:length(alpha_vec)){
  
  #establish prior
  alpha <- alpha_vec[i]
  beta <- beta_vec[i]
  
  alpha_in <- alpha_in_vec[i]
  beta_in <- beta_in_vec[i]
  
  alpha_out <- alpha_out_vec[i]
  beta_out <- beta_out_vec[i]
  
  print(1)
  prob_T = prob
  true_est_N <- estimate_N(true_graph,prob_T, n, degrees)
  full_true_est_N <- max_posterior_N(alpha, beta, c, true_graph, degrees, n, true_N = N)
  
  print(2)
  prob_Craw = prob
  Craw_est_N <- estimate_N(original_G_S_est, prob_Craw, n, degrees)
  full_Craw_est_N <- max_posterior_N(alpha, beta, c, original_G_S_est, degrees, n, true_N = N)
  
  
  print(3)
  prob_better = prob
  better_est_N <- estimate_N(true_G_S_est, prob_better, n, degrees)
  full_better_est_N <- max_posterior_N(alpha, beta, c, true_G_S_est, degrees, n, true_N = N)
  
  true_edges <- as.numeric(sum(rds$true_network))
  
  
  
  print(4)
  full_true_est_N_sb_opt <- optim_sbm_likelihood(start_parameters,
                                                 degrees,
                                                 true_graph,
                                                 group_mem,
                                                 alpha_in, beta_in,
                                                 alpha_out, beta_out)
  
  full_true_est_N_sb <- round(full_true_est_N_sb_opt[3] +
                                full_true_est_N_sb_opt[3]*full_true_est_N_sb_opt[4])
  print('probs')
  print(in_prob)
  print(full_true_est_N_sb_opt[1])
  print(out_prob)
  print(full_true_est_N_sb_opt[1]*full_true_est_N_sb_opt[2])
  print("true N")
  print(full_true_est_N_sb)
  
  print(5)
  full_Craw_est_N_sb_opt <- optim_sbm_likelihood(start_parameters,
                                                 degrees,
                                                 original_G_S_est,
                                                 group_mem,
                                                 alpha_in, beta_in,
                                                 alpha_out, beta_out)
  
  full_Craw_est_N_sb <- round(full_Craw_est_N_sb_opt[3] +
                                full_Craw_est_N_sb_opt[3]*full_Craw_est_N_sb_opt[4])
  print("N")
  print(full_Craw_est_N_sb)
  
  print(6)
  full_better_est_N_sb_opt <- optim_sbm_likelihood(start_parameters,
                                                   degrees,
                                                   true_G_S_est,
                                                   group_mem,
                                                   alpha_in, beta_in,
                                                   alpha_out, beta_out)
  
  full_better_est_N_sb <- round(full_better_est_N_sb_opt[3] +
                                  full_better_est_N_sb_opt[3]*full_better_est_N_sb_opt[4])
  print("N")
  print(full_better_est_N_sb)
  
  
  print(i)
  
  
  results <- rbind(results, c(prop, epsilon, N, prob*N, best_mle_know, true_lambda_est, given_mle,
                              prob_T,
                              true_edges, true_edges_est, given_edges,
                              Accuracy_T, Accuracy_O,
                              TPR_T, TPR_O,
                              TNR_T, TNR_O,
                              true_est_N, (true_est_N - N)^2,
                              Craw_est_N, (Craw_est_N - N)^2, 
                              better_est_N, (better_est_N - N)^2,
                              alpha, beta,
                              full_true_est_N, (full_true_est_N - N)^2,
                              full_Craw_est_N, (full_Craw_est_N - N)^2, 
                              full_better_est_N, (full_better_est_N - N)^2,
                              full_true_est_N_sb, (full_true_est_N_sb - N)^2,
                              full_Craw_est_N_sb, (full_Craw_est_N_sb - N)^2, 
                              full_better_est_N_sb, (full_better_est_N_sb - N)^2
  ))
  
  print(results)
  
  results_frame <- data.frame(results)
  
  
  colnames(results_frame) <- c("sb_prop", "epsilon", "N", "Np", "best MLE Gs Known", "Alt Lambda Est",
                               "Crawford Lambda Est",
                               "True Prob",
                               "Es Size", "Alt Es Size", "Crawford Es Size",
                               "Alt Accuracy", "Crawford Accuracy",
                               "Alt TPR", "Crawford TPR",
                               "Alt TNR", "Crawford TNR",
                               "Perf N Est", "Perf N MSE",
                               "Crawford N est", "Crawford N MSE",
                               "Alt N Est", "Alt N MSE",
                               "alpha", "beta",
                               "Full Perf N Est", "Full Perf N MSE",
                               "Full Crawford N est", "Full Crawford N MSE",
                               "Full Alt N Est", "Full Alt N MSE",
                               "Full Perf N Est SB", "Full Perf N MSE SB",
                               "Full Crawford N Est SB", "Full Crawford N MSE SB",
                               "Full Alt N Est SB", "Full Alt N MSE SB")
  
  #Use true G_S in network regression and estimation of population size
  
  
  
  saveRDS(results, paste0('RegbiasLambda_', taskID, '.rds')) 
  
  
}


results_frame <- data.frame(results)


colnames(results_frame) <- c("sb_prop", "epsilon", "N", "Np", "best MLE Gs Known", "Alt Lambda Est",
                             "Crawford Lambda Est",
                             "True Prob",
                             "Es Size", "Alt Es Size", "Crawford Es Size",
                             "Alt Accuracy", "Crawford Accuracy",
                             "Alt TPR", "Crawford TPR",
                             "Alt TNR", "Crawford TNR",
                             "Perf N Est", "Perf N MSE",
                             "Crawford N est", "Crawford N MSE",
                             "Alt N Est", "Alt N MSE",
                             "alpha", "beta",
                             "Full Perf N Est", "Full Perf N MSE",
                             "Full Crawford N est", "Full Crawford N MSE",
                             "Full Alt N Est", "Full Alt N MSE",
                             "Full Perf N Est SB", "Full Perf N MSE SB",
                             "Full Crawford N Est SB", "Full Crawford N MSE SB",
                             "Full Alt N Est SB", "Full Alt N MSE SB")

#Use true G_S in network regression and estimation of population size

saveRDS(results, paste0('RegbiasLambda_', taskID, '.rds')) 




# #initialize parameters
# current_sample <- rds$network_recruited
# mle <- mle_lambda(rds, rds$network_recruited, 1)
# current_log_likelihood <- -Inf
# 
# for(i in 1:small_sim){
#   #propose new graph - equally likely to propose subtraction and addition
#   proposal <- new_proposal_flip(rds, current_sample)
#   proposal <- proposal$new_sample
#   
#   #find lambda mle and determine likelihood at the mle
#   proposal_lambda_mle <- mle_lambda(rds, proposal, 1)
#   proposal_regress <- regression_mle(Y, proposal)
#   proposal_likelihood <- total_log_likelihood(rds, Y, proposal_regress, proposal, proposal_lambda_mle, 1)
#   
#   
#   if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
#     
#     #climb is proposal is better than current parameters
#     current_log_likelihood <- proposal_likelihood
#     current_sample <- proposal
#     mle <- proposal_lambda_mle
#     current_regress <- proposal_regress
#   } 
# }
# 
# given_mle <- as.numeric(mle)
# given_edges <- as.numeric(sum(current_sample))
# original_G_S_est <- current_sample
# regress <- current_regress
# 
# print("given_mle")
# print(given_mle)
# print("given_edges")
# print(given_edges)


# 
# 
# 
# alpha <- 1
# beta <- 1
# c <- 1
# 
# #estimate population size
# print(given_edges)
# degrees <- rds$degrees[rds$recruited]
# true_graph <- rds$true_network
# n <- as.numeric(dim(true_graph)[1])
# 
# full_true_est_N_2 <- max_posterior_N(alpha, beta, c, true_graph, degrees, n)
# 
# full_Craw_est_N_2 <- max_posterior_N(alpha, beta, c, original_G_S_est, degrees, n)
# 
# full_better_est_N_2 <- max_posterior_N(alpha, beta, c, true_G_S_est, degrees, n)
# 
# alpha <- 3000
# beta <- alpha*(1-p)/p
# c <- 1
# 
# #estimate population size
# print(given_edges)
# degrees <- rds$degrees[rds$recruited]
# true_graph <- rds$true_network
# n <- as.numeric(dim(true_graph)[1])
# 
# full_true_est_N_3 <- max_posterior_N(alpha, beta, c, true_graph, degrees, n)
# 
# full_Craw_est_N_3 <- max_posterior_N(alpha, beta, c, original_G_S_est, degrees, n)
# 
# full_better_est_N_3 <- max_posterior_N(alpha, beta, c, true_G_S_est, degrees, n)
# 
# 
# alpha <- 1
# beta <- 99
# c <- 0
# 
# #estimate population size
# print(given_edges)
# degrees <- rds$degrees[rds$recruited]
# true_graph <- rds$true_network
# n <- as.numeric(dim(true_graph)[1])
# 
# full_true_est_N_4 <- max_posterior_N(alpha, beta, c, true_graph, degrees, n)
# 
# full_Craw_est_N_4 <- max_posterior_N(alpha, beta, c, original_G_S_est, degrees, n)
# 
# full_better_est_N_4 <- max_posterior_N(alpha, beta, c, true_G_S_est, degrees, n)
# 
# 




# #Without fixed lambda
# 
# #initialize parameters
# current_sample <- rds$network_recruited
# mle <- mle_lambda(rds, rds$network_recruited, 1)
# current_log_likelihood <- -Inf
# 
# for(i in 1:small_sim){
#   #propose new graph - equally likely to propose subtraction and addition
#   proposal <- new_proposal_flip(rds, current_sample)
#   proposal <- proposal$new_sample
#   
#   #find lambda mle and determine likelihood at the mle
#   proposal_lambda_mle <- mle_lambda(rds, proposal, 1)
#   proposal_regress <- regression_mle(Y, proposal)
#   proposal_likelihood <- total_log_likelihood(rds, Y, proposal_regress, proposal, proposal_lambda_mle, 1)
#   
#   
#   if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
#     
#     #climb is proposal is better than current parameters
#     current_log_likelihood <- proposal_likelihood
#     current_sample <- proposal
#     mle <- proposal_lambda_mle
#     current_regress <- proposal_regress
#   }
# }
# lambda_reg <- mle
# given_mle_reg <- as.numeric(mle)
# given_edges_reg <- as.numeric(sum(current_sample))
# original_G_S_est_reg <- current_sample
# regress_reg <- current_regress
# 
# print("given_mle")
# print(given_mle_reg)
# print("given_edges")
# print(given_edges_reg)
# 
# 
# #Only regression 
# 
# #initialize parameters
# current_sample <- rds$network_recruited
# current_log_likelihood <- -Inf
# 
# for(i in 1:small_sim){
#   #propose new graph - equally likely to propose subtraction and addition
#   proposal <- new_proposal_flip(rds, current_sample)
#   proposal <- proposal$new_sample
#   
#   #find lambda mle and determine likelihood at the mle
#   proposal_regress <- regression_mle(Y, proposal)
#   proposal_likelihood <- log_likelihood_Beta_sigma(Y, proposal_regress$X_matrix, 
#                                                    proposal_regress$beta, 
#                                                    proposal_regress$sigma)
#   
#   
#   if(as.matrix(proposal_likelihood)> as.matrix(current_log_likelihood)){
#     
#     #climb is proposal is better than current parameters
#     current_log_likelihood <- proposal_likelihood
#     current_sample <- proposal
#     current_regress <- proposal_regress
#   }
# }
# 
# 
# given_edges_Onlyreg <- as.numeric(sum(current_sample))
# original_G_S_est_Onlyreg <- current_sample
# regress_Onlyreg <- current_regress
# 
# print("given_edges")
# print(given_edges_Onlyreg)

