regression_adjustment = function(outcome, vec.loc, sub.x.s, sub.folds.s, mat.x.s, folds.s, model, cross_fitting){
  
  ##--------------
  ## setup   
  ##--------------
  n.loc   = length(vec.loc)    ## # of locations 
  
  # empty matrix to store averages
  mu.sub = matrix(NA, length(sub.x.s), n.loc) # Predicted means in sub sample (treatment or control) within stratum s
  mu.all = matrix(NA, length(mat.x.s), n.loc) # Predicted means in full sample within stratum s
  
if (cross_fitting==1){
  for(j in 1:n.loc){
    y = outcome[,j] ## jth column - outcome
    # Conduct cross-fitting
    for(f in 1:F){
      if (model=="gradient_boosting"){
        # Fit ML model using data in treatment group d, but not in fold f
        bst = xgboost(data=sub.x.s[sub.folds.s!=f,], label=y[sub.folds.s!=f],
                      max_depth =3, eta=0.1, nrounds = 300, verbose = 0,
                      objective="binary:logistic")
        # Predict on fold f for sub sample
        mu.sub[sub.folds.s==f,j] =  predict(bst, sub.x.s[sub.folds.s==f,])
        # Predict on fold f for full sample
        mu.all[folds.s==f,j] =  predict(bst, mat.x.s[folds.s==f,])
      }
      
      else if (model=="logistic_regression"){
        # Fit ML model using data in treatment group d, but not in fold f
        logit = fastglm(x=cbind(1, sub.x.s[sub.folds.s!=f,]),
                        y= y[sub.folds.s!=f],
                        family = binomial(link = "logit"))
        # Predict on fold f for sub sample
        mu.sub[sub.folds.s==f,j] =  predict(logit, cbind(1,sub.x.s[sub.folds.s==f,]), type="response")
        # Predict on fold f for full sample
        mu.all[folds.s==f,j] =  predict(logit, cbind(1, mat.x.s[folds.s==f,]), type="response")
      }
      
      else if (model == "lasso"){
        cvlasso = cv.glmnet(sub.x.s[sub.folds.s!=f,], y[sub.folds.s!=f], alpha=1, family = "binomial", parallel = TRUE)
        # Predict on fold f for sub sample
        mu.sub[sub.folds.s==f,j] =  predict(cvlasso, sub.x.s[sub.folds.s==f,], s=cvlasso$lambda.min, type="response")
        # Predict on fold f for full sample
        mu.all[folds.s==f,j] =  predict(cvlasso, mat.x.s[folds.s==f,], s=cvlasso$lambda.min, type="response")
        
      }
      else if (model == "random_forest"){
        rf = randomForest(sub.x.s[sub.folds.s!=f,], as.factor(y[sub.folds.s!=f]), ntree=500)
        # Predict on fold f for sub sample
        mu.sub[sub.folds.s==f,j] =  predict(rf, sub.x.s[sub.folds.s==f,])
        # Predict on fold f for full sample
        mu.all[folds.s==f,j] =  predict(rf, mat.x.s[folds.s==f,])
        
      }
      
      else if (model == "regression_forest"){
        regf = regression_forest(sub.x.s[sub.folds.s!=f,], y[sub.folds.s!=f], tune.parameters = "all")
        # Predict on fold f for sub sample
        mu.sub[sub.folds.s==f,j] =  predict(regf, sub.x.s[sub.folds.s==f,])
        # Predict on fold f for full sample
        mu.all[folds.s==f,j] =  predict(regf, mat.x.s[folds.s==f,])
        
      }
      
      else if (model=="ols"){
        data_train = as.data.frame(cbind(y[sub.folds.s!=f], sub.x.s[sub.folds.s!=f,]))
        colnames(data_train) = c("y", colnames(sub.x.s))
        new_x_sub = as.data.frame(sub.x.s[sub.folds.s==f,])
        new_x_all = as.data.frame(mat.x.s[folds.s==f,])
        ols = lm(y~., data=data_train)
        # Predict on fold f for sub sample
        mu.sub[sub.folds.s==f,j] =  predict(ols, new_x_sub)
        # Predict on fold f for full sample
        mu.all[folds.s==f,j] =  predict(ols, new_x_all)
      }
    }
  }
} 

else if (cross_fitting==0){
  for(j in 1:n.loc){
    y = outcome[,j] ## jth column - outcome
    
    if (model=="gradient_boosting"){
      # Fit ML model using data in treatment group d
      bst = xgboost(data=sub.x.s, label=y,
                    max_depth =3, eta=0.1, nrounds = 300, verbose = 0,
                    objective="binary:logistic")
      # Predict on fold f for sub sample
      mu.sub[,j] =  predict(bst, sub.x.s)
      # Predict on fold f for full sample
      mu.all[,j] =  predict(bst, mat.x.s)
    }
    
    else if (model=="logistic_regression"){
      # Fit ML model using data in treatment group d, but not in fold f
      logit = fastglm(x=cbind(1, sub.x.s),
                      y= y,
                      family = binomial(link = "logit"))
      # Predict on fold f for sub sample
      mu.sub[,j] =  predict(logit, cbind(1,sub.x.s), type="response")
      # Predict on fold f for full sample
      mu.all[,j] =  predict(logit, cbind(1, mat.x.s), type="response")
    }
    
    else if (model == "lasso"){
      cvlasso = cv.glmnet(sub.x.s, y, alpha=1, family = "binomial", parallel = TRUE)
      # Predict on fold f for sub sample
      mu.sub[,j] =  predict(cvlasso, sub.x.s, s=cvlasso$lambda.min, type="response")
      # Predict on fold f for full sample
      mu.all[,j] =  predict(cvlasso, mat.x.s, s=cvlasso$lambda.min, type="response")
      
    }
    else if (model == "random_forest"){
      rf = randomForest(sub.x.s, as.factor(y), ntree=100)
      # Predict on fold f for sub sample
      mu.sub[,j] =  predict(rf, sub.x.s)
      # Predict on fold f for full sample
      mu.all[,j] =  predict(rf, mat.x.s)
      
    }
    
    else if (model == "regression_forest"){
      regf = regression_forest(sub.x.s, y, tune.parameters = "all")
      # Predict on fold f for sub sample
      mu.sub[,j] =  predict(regf, sub.x.s)
      # Predict on fold f for full sample
      mu.all[,j] =  predict(regf, mat.x.s)
      
    }
    else if (model=="ols"){
      data_train = as.data.frame(cbind(y, sub.x.s))
      colnames(data_train) = c("y", colnames(sub.x.s))
      new_x_sub = as.data.frame(sub.x.s)
      new_x_all = as.data.frame(mat.x.s)
      ols = lm(y~., data=data_train)
      # Predict on fold f for sub sample
      mu.sub[,j] =  predict(ols, new_x_sub)
      # Predict on fold f for full sample
      mu.all[,j] =  predict(ols, new_x_all)
    }
  }
}
  return(list(mu.sub = mu.sub, mu.all=mu.all))
}


local.DTE.ML.estimation = function(df, vec.loc,  model, cross_fitting){
 
  # containers 
  numerator = 0
  numerator.ra = 0
  denominator = 0
  denominator.ra = 0
  
  vec.s = df$S
  
    for (s in 1:max(vec.s)) {
      # Loop over strata S
      
      pi = mean(vec.s==s)
      
      # Data with S =s
      df.s = df %>% filter(S==s)
      # Data with Z=1
      df.1 = df %>% filter(Z==1)
      df.1.s = df %>% filter(S==s & Z==1)
      # Data with Z=0 
      df.0 = df %>% filter(Z==0)
      df.0.s = df %>% filter(S==s & Z==0)
      
      ### NUMERATOR -- second stage 
      # ECDF
      mu.1.s = 1 * outer(df.1.s$Y, vec.loc, "<=")
      mu.0.s = 1 * outer(df.0.s$Y, vec.loc, "<=")
      
      # Unadjusted numerator
      numerator_temp = pi*(colMeans(as.matrix((mu.1.s))) - colMeans(as.matrix(mu.0.s)))
      numerator = numerator + numerator_temp # Sum over all S
      
      # Regression adjustment for CDFs: regress Y on Z and X within each stratum (separately for each Z=0 and Z=1)
      mu.reg.adj.1 = regression_adjustment(mu.1.s, vec.loc, as.matrix(df.1.s[ ,covariate_names]), as.matrix(df.1.s$folds), as.matrix(df.s[ ,covariate_names]), df.s$folds, model, cross_fitting)
      mu.reg.adj.0 = regression_adjustment(mu.0.s, vec.loc, as.matrix(df.0.s[ ,covariate_names]), df.0.s$folds, as.matrix(df.s[ ,covariate_names]), df.s$folds, model, cross_fitting)
      
      # Adjusted numerator
      numerator_temp.ra = pi*(colMeans(mu.1.s) - colMeans(mu.reg.adj.1$mu.sub) + colMeans(mu.reg.adj.1$mu.all)-
            (colMeans(mu.0.s)- colMeans(mu.reg.adj.0$mu.sub) + colMeans(mu.reg.adj.0$mu.all)))
      
      numerator.ra = numerator.ra + numerator_temp.ra
      
      ### DENOMANITOR -- first stage
      # Unadjusted denominator
      denominator = colMeans(as.matrix(df.1.s$D)) - colMeans(as.matrix(df.0.s$D))
      
      # Regression adjustment for the denominator: regress D on Z and X within each stratum (separately for each Z=0 and Z=1)
      denom.reg.adj.1 = regression_adjustment(as.matrix(df.1.s$D), 1, as.matrix(df.1.s[ ,covariate_names]), df.1.s$folds, as.matrix(df.s[ ,covariate_names]), df.s$folds, model, cross_fitting)
      denom.reg.adj.0 = regression_adjustment(as.matrix(df.0.s$D), 1, as.matrix(df.0.s[ ,covariate_names]), df.0.s$folds, as.matrix(df.s[ ,covariate_names]), df.s$folds, model, cross_fitting)
      
      # Adjusted denominator
      denominator_temp.ra = pi*(colMeans(as.matrix(df.1.s$D)) - colMeans(denom.reg.adj.1$mu.sub) + colMeans(denom.reg.adj.1$mu.all)-
                          (colMeans(as.matrix(df.0.s$D)) - colMeans(denom.reg.adj.0$mu.sub) + colMeans(denom.reg.adj.0$mu.all)))
      
      denominator.ra = denominator.ra + denominator_temp.ra # Sum over all S
      
    }
  return(list(numerator = numerator,
              denominator = denominator,
              numerator.ra = numerator.ra,
              denominator.ra = denominator.ra))
    
}


local.PTE.ML.estimation = function(df, vec.loc.up, vec.loc.low, model, cross_fitting){
  
  # containers 
  numerator = 0
  numerator.ra = 0
  denominator = 0
  denominator.ra = 0
  
  vec.s = df$S
  
  for (s in 1:max(vec.s)) {
    # Loop over strata S
    
    pi = mean(vec.s==s)
    
    # Data with S =s
    df.s = df %>% filter(S==s)
    # Data with Z=1
    df.1 = df %>% filter(Z==1)
    df.1.s = df %>% filter(S==s & Z==1)
    # Data with Z=0 
    df.0 = df %>% filter(Z==0)
    df.0.s = df %>% filter(S==s & Z==0)
    
    ### NUMERATOR -- second stage 
    # Probabilities
    mu.1.s.up = 1 * outer(df.1.s$Y, vec.loc.up, "<=")  ## n x n.loc
    mu.1.s.low = 1 * outer(df.1.s$Y, vec.loc.low, ">")  ## n x n.loc
    mu.1.s = mu.1.s.up * mu.1.s.low 
    
    mu.0.s.up = 1 * outer(df.0.s$Y, vec.loc.up, "<=")  ## n x n.loc
    mu.0.s.low = 1 * outer(df.0.s$Y, vec.loc.low, ">")  ## n x n.loc
    mu.0.s = mu.0.s.up * mu.0.s.low 
    
    
    # Unadjusted numerator
    numerator_temp = pi*(colMeans(as.matrix((mu.1.s))) - colMeans(as.matrix(mu.0.s)))
    numerator = numerator + numerator_temp # Sum over all S
    
    # Regression adjustment for CDFs: regress Y on Z and X within each stratum (separately for each Z=0 and Z=1)
    mu.reg.adj.1 = regression_adjustment(mu.1.s, vec.loc, as.matrix(df.1.s[ ,covariate_names]), as.matrix(df.1.s$folds), as.matrix(df.s[ ,covariate_names]), df.s$folds, model, cross_fitting)
    mu.reg.adj.0 = regression_adjustment(mu.0.s, vec.loc, as.matrix(df.0.s[ ,covariate_names]), df.0.s$folds, as.matrix(df.s[ ,covariate_names]), df.s$folds, model, cross_fitting)
    
    # Adjusted numerator
    numerator_temp.ra = pi*(colMeans(mu.1.s) - colMeans(mu.reg.adj.1$mu.sub) + colMeans(mu.reg.adj.1$mu.all)-
                              (colMeans(mu.0.s)- colMeans(mu.reg.adj.0$mu.sub) + colMeans(mu.reg.adj.0$mu.all)))
    
    numerator.ra = numerator.ra + numerator_temp.ra
    
    ### DENOMANITOR -- first stage
    # Unadjusted denominator
    denominator = colMeans(as.matrix(df.1.s$D)) - colMeans(as.matrix(df.0.s$D))
    
    # Regression adjustment for the denominator: regress D on Z and X within each stratum (separately for each Z=0 and Z=1)
    denom.reg.adj.1 = regression_adjustment(as.matrix(df.1.s$D), 1, as.matrix(df.1.s[ ,covariate_names]), df.1.s$folds, as.matrix(df.s[ ,covariate_names]), df.s$folds, model, cross_fitting)
    denom.reg.adj.0 = regression_adjustment(as.matrix(df.0.s$D), 1, as.matrix(df.0.s[ ,covariate_names]), df.0.s$folds, as.matrix(df.s[ ,covariate_names]), df.s$folds, model, cross_fitting)
    
    # Adjusted denominator
    denominator_temp.ra = pi*(colMeans(as.matrix(df.1.s$D)) - colMeans(denom.reg.adj.1$mu.sub) + colMeans(denom.reg.adj.1$mu.all)-
                                (colMeans(as.matrix(df.0.s$D)) - colMeans(denom.reg.adj.0$mu.sub) + colMeans(denom.reg.adj.0$mu.all)))
    
    denominator.ra = denominator.ra + denominator_temp.ra # Sum over all S
    
  }
  return(list(numerator = numerator,
              denominator = denominator,
              numerator.ra = numerator.ra,
              denominator.ra = denominator.ra))
  
}






# local.DTE.bootstrap.se = function(df, vec.loc, model, cross_fitting, covariate_names, B.size){
#   
#   # Containers for bootstrap estimates
#   ldte_bootstrap = numeric(B.size)
#   ldte_ra_bootstrap = numeric(B.size)
#   
#   # sample size
#   num_obs = nrow(df)
#   
#   # Bootstrap procedure
#   for (i in 1:B.size) {
#     # Resample the data with replacement
#     df_bootstrap = df[sample(1:nrow(df), replace = TRUE), ]
#     
#     # Compute local DTE and adjusted DTE for the bootstrap sample
#     dte_estimates = local.DTE.ML.estimation(df_bootstrap, vec.loc, model, cross_fitting)
#     
#     # Store the estimates
#     ldte_bootstrap[i] = dte_estimates$ldte
#     ldte_ra_bootstrap[i] = dte_estimates$ldte.ra
#   }
#   
#   # Calculate standard errors as the standard deviation of the bootstrap estimates (divided by sqrt of n)
#   ldte_se = sd(ldte_bootstrap)/sqrt(num_obs)
#   ldte_ra_se = sd(ldte_ra_bootstrap)/sqrt(num_obs)
#   
#   return(list(ldte_se = ldte_se, ldte_ra_se = ldte_ra_se))
# 
# }




# Parallel running bootstrap function
local.DTE.bootstrap = function(sim){
  
  # sample size
  num_obs = nrow(df)
  
  # Resample the data with replacement
  # df_bootstrap = df[sample(1:nrow(df), replace = TRUE), ]
  # Resample within each stratum
  df_bootstrap = do.call(rbind, lapply(split(df, df$S), function(stratum) {
    stratum[sample(nrow(stratum), size = nrow(stratum), replace = TRUE), ]
  }))
  
  # Compute local DTE and adjusted DTE for the bootstrap sample
  ldte_estimates = local.DTE.ML.estimation(df_bootstrap, vec.loc, "gradient_boosting", 1)
  
  ldte = ldte_estimates$numerator/ldte_estimates$denominator
  ldte.ra = ldte_estimates$numerator.ra/ldte_estimates$denominator.ra
  
  saveRDS(ldte, file=paste0("./result/oregon/ldte_", sim, ".rds"))
  saveRDS(ldte.ra, file=paste0("./result/oregon/ldte.ra_", sim, ".rds"))
  
  # return
  return(list(ldte = ldte, ldte.ra = ldte.ra))
  
}

# Parallel running bootstrap function
local.PTE.bootstrap = function(sim){
  
  # sample size
  num_obs = nrow(df)
  
  # Resample the data with replacement
  # df_bootstrap = df[sample(1:nrow(df), replace = TRUE), ]
  # Resample within each stratum
  df_bootstrap = do.call(rbind, lapply(split(df, df$S), function(stratum) {
    stratum[sample(nrow(stratum), size = nrow(stratum), replace = TRUE), ]
  }))
  
  # Compute local DTE and adjusted DTE for the bootstrap sample
  lpte_estimates = local.PTE.ML.estimation(df_bootstrap, vec.loc.up, vec.loc.low, "gradient_boosting", 1)
  
  lpte = lpte_estimates$numerator/lpte_estimates$denominator
  lpte.ra = lpte_estimates$numerator.ra/lpte_estimates$denominator.ra
  
  saveRDS(ldte, file=paste0("./result/oregon/lpte_", sim, ".rds"))
  saveRDS(ldte.ra, file=paste0("./result/oregon/lpte.ra_", sim, ".rds"))
  
  # return
  return(list(lpte = lpte, lpte.ra = lpte.ra))
  
}

