rm(list=ls())

library(Rcpp)
library(RcppEigen)
library(RcppParallel)

Sys.setenv("PKG_CPPFLAGS" = paste0(
  "-I", system.file("include", package="Rcpp"), " ",
  "-I", system.file("include", package="RcppEigen"), " ",
  "-I", system.file("include", package="RcppParallel"), " ",
  RcppEigen:::CxxFlags(), " ",
  RcppParallel::CxxFlags()
))

sourceCpp("/kernel_function_.cpp")

library(tidyverse)
library(magrittr)
library(stringr)
tit<-c("heading 1","heading 2","heading 3")
# library(officer);
addtitle<-function(name,num,doc){
  # num=1
  # name='name'
  num_<-c(18,16,14,14)
  # tit<-c("heading 1","heading 2","heading 3")
  fp_1<-fp_text(color = 'black',font.size = num_[num],font.family = 'ST')
  fp<-ftext(name, prop = fp_1 )
  fp<-fpar( fp,fp_p = fp_par(text.align = "justify"))
  if(num!=4){
    doc<-body_add(doc,fp, style = tit[num], pos = "after")%>%
      body_add_par("",style = 'Normal')
  }else{
    doc<-body_add(doc,fp, pos = "after")%>%
      body_add_par("",style = 'Normal')
  }
  
  return(doc)
  # %>%
  #   body_add_par("",style = 'Normal')
}

# doc<-read_docx()
#####continus#####
time_=1
data_save_<-c()
data_cov<-c()
list_save<-list()

while(time_<200){
  n<-2000
  X1<-runif(n)
  X2<-runif(n)
  
  prop<-1/(1+exp(X1)+exp(X2))
  A_<- rbinom(n,size = 1, prob = prop)
  
  # 计算 pr(A = 1 | A* = 1, X) pr(A = 1 | A* = 0, X)
  pr_A_1_given_A_1_X <- plogis(1 + 2 * X1)
  
  pr_A_1_given_A_0_X <- plogis(-1 + 0.5 * X1 - X2)
  
  A <- ifelse(A_ == 1, 
              rbinom(n, size = 1, prob = pr_A_1_given_A_1_X), 
              rbinom(n, size = 1, prob = pr_A_1_given_A_0_X))
  # Calculate pr(Z = 1 | A* = 1, X) pr(Z = 1 | A* = 0, X)
  pr_Z_1_given_A_1_X <- plogis(-1 - X1 + 0.5 * X2)
  
  
  pr_Z_1_given_A_0_X <- plogis(2 + X1)
  
  Z <- ifelse(A_ == 1, 
              rbinom(n, size = 1, prob = pr_Z_1_given_A_1_X), 
              rbinom(n, size = 1, prob = pr_Z_1_given_A_0_X))
  
  # Calculate pr(Y = 1 | A* = 0, X)pr(Y = 1 | A* = 1, X)
  pr_Y_1_given_A_1 <- plogis(-2 + 0.5 * X1 + X2)
  
  pr_Y_1_given_A_0 <- plogis(1 - 2 * X1 + 0.5 * X2)
  

  # Y <- ifelse(A_ == 1, 
  #             rbinom(n, size = 1, prob = pr_Y_1_given_A_1), 
  #             rbinom(n, size = 1, prob = pr_Y_1_given_A_0))
  sgm<-rnorm(n)
  Y <- sin(3.1415926*X1)+(A_-0.5)*(X1+X2)+sgm
  # library(gplm)
  data <- data.frame( X1 = X1, X2 = X2, Y = Y,Z=Z,A=A,A_star = A_)
  data[,c('A','X1','X2','Y')]<-lapply(data[,c('A','X1','X2','Y')],as.numeric)
  #set 1
  p0_A_star_1_given_X=runif(n);p0_A_star_0_given_X=1-p0_A_star_1_given_X
  p0_Y_given_A_star_1_X=runif(n);p0_Y_given_A_star_0_X=runif(n)
  p0_A_1_given_A_star_1_X=runif(n,0.5,1);p0_A_0_given_A_star_1_X=1-p0_A_1_given_A_star_1_X
  p0_A_1_given_A_star_0_X=runif(n,0,0.5);p0_A_0_given_A_star_0_X=1-p0_A_1_given_A_star_0_X
  p0_Z_1_given_A_star_1_X=runif(n,0,0.5);p0_Z_0_given_A_star_1_X=1-p0_Z_1_given_A_star_1_X
  p0_Z_1_given_A_star_0_X=runif(n,0.5,1);p0_Z_0_given_A_star_0_X=1-p0_Z_1_given_A_star_0_X
  
  kx<-x <- data[,c('X1','X2')]
  a<-data[,c('A')]
  # dyn.load("kernel_regression.so")
  A_one=1
  A_a<-A_one-A
  A_a<-as.data.frame(A_a)
  K <-kernels(A_a)
  
  # result <- .C("r_kernels", A_a)
  Ksum <- sum(K)
  pa<-K/(max(K))#+min(K)
  # for(i in 1:10){
  kernel_y<-get_pre_y(as.matrix(x),a,as.matrix(kx))
  colnames(kernel_y)<-c(colnames(x),'y_')
  pr_A_1_given_X<-unlist(kernel_y[,ncol(kernel_y)])#0.3556
  #   print(mean(pr_A_1_given_X))
  #   ouuu<-kernel_y
  # }
  pr_A_0_given_X<-1-pr_A_1_given_X
  pr_A_given_X<-ifelse(A==1,pr_A_1_given_X,pr_A_0_given_X)
  
  # kernel_y<-get_pre_y(x,a,kx)
  # pr_A_1_given_X<-unlist(kernel_y[,ncol(kernel_y)])
  # # View(cbind(pr_A_1_given_X,A))
  # # mean(pr_A_1_given_X[A==1]);mean(pr_A_1_given_X[A==0])
  # pr_A_0_given_X<-1-pr_A_1_given_X
  # pr_A_given_X<-ifelse(A==1,pr_A_1_given_X,pr_A_0_given_X)
  
  z<-data[,c('Z')]
  Z_one=1
  Z_a<-Z_one-Z
  Z_a<-as.data.frame(Z_a)
  K <-kernels(Z_a)
  Ksum <- sum(K)
  pz<-K/(max(K))#+min(K)
  kernel_y<-get_pre_y(as.matrix(x),z,as.matrix(kx))
  colnames(kernel_y)<-c(colnames(x),'y_')
  
  # kernel_y<-get_pre_y(x,z,kx)
  pr_Z_1_given_X<-unlist(kernel_y[,ncol(kernel_y)])
  # mean(pr_Z_1_given_X[Z==1]);mean(pr_Z_1_given_X[Z==0])
  pr_Z_0_given_X<-1-pr_Z_1_given_X
  pr_Z_given_X<-ifelse(Z==1,pr_Z_1_given_X,pr_Z_0_given_X)
  # Y=data[,c('Y')]
  # x<-kx<- data[,c('X1','X2')]
  # pr_Y_given_X<-c()
  # for(x_estimate in 1:length(Y)){
  #   # X_x <-  x_estimate - x
  #   if(x_estimate%%50==0) print(x_estimate)
  #   Y_estimate=Y[x_estimate]
  #   Y_y<-Y_estimate-Y
  #   Y_y<-as.data.frame(Y_y)
  #   K <-kernels(Y_y)
  #   # Ksum <- sum(K)
  #   kxx<-K/max(K)
  #   results <-get_pre_y(as.matrix(x),kxx,as.matrix(kx))
  #   colnames(kernel_y)<-c(colnames(x),'y_')
  #   
  #   # final <- do.call('c',results)
  #   results_<-c()
  #   results_<-rbind(results_,results)
  #   
  #   pr_Y_given_X<-c(pr_Y_given_X,unlist(results_[,ncol(results_)]))
  #   
  # }
  # pr_Y_given_X_save<-pr_Y_given_X
  # pp<-c()
  # for(i in 1:nrow(data)){
  #   print(sum(pr_Y_given_X_save[seq(i,length(pr_Y_given_X_save),nrow(data))]))
  #   pp<-c(pp,sum(pr_Y_given_X_save[seq(i,length(pr_Y_given_X_save),nrow(data))]))
  #   # pr_Y_given_X_save[seq(i,length(pr_Y_given_X_save),nrow(data))]<-pr_Y_given_X_save[seq(i,length(pr_Y_given_X_save),nrow(data))]/sum(pr_Y_given_X_save[seq(i,length(pr_Y_given_X_save),nrow(data))])
  # }
  # pr_Y_given_X_save<-pr_Y_given_X_save/mean(pp)
  # pr_Y_given_X__<-c()
  # for(x_estimate in 1:length(Y)){
  #   pr_Y_given_X_<-pr_Y_given_X_save[((x_estimate-1)*n+1):((x_estimate)*n)]
  #   pr_Y_given_X_<-pr_Y_given_X_[x_estimate]
  #   print(pr_Y_given_X_);print(max(pr_Y_given_X_))
  #   pr_Y_given_X__<-c(pr_Y_given_X__,pr_Y_given_X_)
  # }
  # 
  # pr_Y_given_X<-pr_Y_given_X__
  #####new cal for py #####
  Y=data[,c('Y')]
  x<-kx<- data[,c('X1','X2')]
  pr_Y_given_X<-c()
  for(x_estimate in 1:length(Y)){
    # X_x <-  x_estimate - x
    if(x_estimate%%2000==0) print(x_estimate)
    Y_estimate=Y[x_estimate]
    Y_y<-Y_estimate-Y
    Y_y<-as.data.frame(Y_y)
    # K <-kernels(Y_y)
    # Ksum <- sum(K)
    # kxx<-K/max(K)
    kxx<-ifelse(Y_y==0,1,0)
    results <-get_pre_y(as.matrix(x),kxx,as.matrix(kx))
    colnames(kernel_y)<-c(colnames(x),'y_')
    
    # final <- do.call('c',results)
    results_<-c()
    results_<-rbind(results_,results)
    
    pr_Y_given_X<-c(pr_Y_given_X,unlist(results_[,ncol(results_)]))
    
  }
  pr_Y_given_X_save<-pr_Y_given_X
  pp<-c()
  for(i in 1:nrow(data)){
    # print(sum(pr_Y_given_X_save[seq(i,length(pr_Y_given_X_save),nrow(data))]))
    pp<-c(pp,sum(pr_Y_given_X_save[seq(i,length(pr_Y_given_X_save),nrow(data))]))
    # pr_Y_given_X_save[seq(i,length(pr_Y_given_X_save),nrow(data))]<-pr_Y_given_X_save[seq(i,length(pr_Y_given_X_save),nrow(data))]/sum(pr_Y_given_X_save[seq(i,length(pr_Y_given_X_save),nrow(data))])
  }
  pr_Y_given_X_save<-pr_Y_given_X_save/mean(pp)
  pr_Y_given_X__<-c()
  for(x_estimate in 1:length(Y)){
    pr_Y_given_X_<-pr_Y_given_X_save[((x_estimate-1)*n+1):((x_estimate)*n)]
    pr_Y_given_X_<-pr_Y_given_X_[x_estimate]
    # print(pr_Y_given_X_);print(max(pr_Y_given_X_))
    pr_Y_given_X__<-c(pr_Y_given_X__,pr_Y_given_X_)
  }
  
  pr_Y_given_X<-pr_Y_given_X__
  ##### #####  
  pr_A_S_1_given_X=p0_A_star_1_given_X
  pr_A_S_0_given_X=p0_A_star_0_given_X
  pr_Y_given_A_star_1_X=p0_Y_given_A_star_1_X
  pr_Y_given_A_star_0_X=p0_Y_given_A_star_0_X
  pr_A_given_A_star_1_X<-ifelse(A==1,p0_A_1_given_A_star_1_X,p0_A_0_given_A_star_1_X)
  pr_Z_given_A_star_1_X<-ifelse(Z==1,p0_Z_1_given_A_star_1_X,p0_Z_0_given_A_star_1_X)
  pr_A_given_A_star_0_X<-ifelse(A==1,p0_A_1_given_A_star_0_X,p0_A_0_given_A_star_0_X)
  pr_Z_given_A_star_0_X<-ifelse(Z==1,p0_Z_1_given_A_star_0_X,p0_Z_0_given_A_star_0_X)
  pi_save<-0
  t=1;o=1
  while(t > 0.001){
    o=o+1
    # print(o)
    pA1<-pr_A_S_1_given_X*pr_Y_given_A_star_1_X*pr_A_given_A_star_1_X*pr_Z_given_A_star_1_X
    pA0<-pr_A_S_0_given_X*pr_Y_given_A_star_0_X*pr_A_given_A_star_0_X*pr_Z_given_A_star_0_X
    
    pi<-pA1/(pA1+pA0)
    # print('mean(pi[A==1]);mean(pi[A==0])'); print(mean(pi[A==1])); print(mean(pi[A==0]))
    # print('mean(A_[A==1]);mean(A_[A==0])'); print(mean(A_[A==1])); print(mean(A_[A==0]))
    
    # mean(pi[A==1]);  mean(pi[A==0])#0.610757 0.3956043
    # mean(pi[Z==1]);  mean(pi[Z==0])#0.4113866 0.680706
    
    kx<-x <-data[,c('X1','X2')]
    kernel_y<-get_pre_y(as.matrix(x),pi,as.matrix(x))
    colnames(kernel_y)<-c(colnames(x),'y_')
    pr_A_S_1_given_X<-unlist(kernel_y[,ncol(kernel_y)])
    # View(cbind(pr_A_S_1_given_X,A_))
    pr_A_S_0_given_X=1-pr_A_S_1_given_X
    kx<-x<-data[,c('Y','X1','X2')]
    kernel_y<-get_pre_y(as.matrix(x),pi,as.matrix(x))
    colnames(kernel_y)<-c(colnames(x),'y_')
    pr_A_S_1_given_Y_X<-unlist(kernel_y[,ncol(kernel_y)])
    # View(cbind(pr_A_S_1_given_Y_X,A_))
    pr_A_S_0_given_Y_X=1-pr_A_S_1_given_Y_X
    
    # x<-data[data$A==1,c('X1','X2')]
    # pi_<-pi[data$A==1]
    # kx<-data[,c('X1','X2')]
    # kernel_y<-get_pre_y(as.matrix(x),pi_,as.matrix(kx))
    # colnames(kernel_y)<-c(colnames(x),'y_')
    # pr_A_S_1_given_A_1_X<-unlist(kernel_y[,ncol(kernel_y)])
    # pr_A_S_0_given_A_1_X=1-pr_A_S_1_given_A_1_X
    
    x<-data[data$A==0,c('X1','X2')]
    pi_<-pi[data$A==0]
    kx<-data[,c('X1','X2')]
    kernel_y<-get_pre_y(as.matrix(x),pi_,as.matrix(kx))
    colnames(kernel_y)<-c(colnames(x),'y_')
    pr_A_S_1_given_A_0_X<-unlist(kernel_y[,ncol(kernel_y)])
    pr_A_S_0_given_A_0_X=1-pr_A_S_1_given_A_0_X
    #mean(pr_A_S_1_given_A_1_X);
    # print('mean(pr_A_S_1_given_A_0_X)'); 
    # # print(mean(pr_A_S_1_given_A_1_X)); 
    # print(mean(pr_A_S_1_given_A_0_X));
    #mean(pr_A_S_1_given_A_1_X);mean(pr_A_S_1_given_A_0_X)# 0.6449716 0.3829605
    
    x<-data[data$Z==1,c('X1','X2')]
    pi_<-pi[data$Z==1]
    kx<-data[,c('X1','X2')]
    kernel_y<-get_pre_y(as.matrix(x),pi_,as.matrix(kx))
    colnames(kernel_y)<-c(colnames(x),'y_')
    pr_A_S_1_given_Z_1_X<-unlist(kernel_y[,ncol(kernel_y)])
    pr_A_S_0_given_Z_1_X=1-pr_A_S_1_given_Z_1_X
    # 
    # x<-data[data$Z==0,c('X1','X2')]
    # pi_<-pi[data$Z==0]
    # kx<-data[,c('X1','X2')]
    # kernel_y<-get_pre_y(as.matrix(x),pi_,as.matrix(kx))
    # colnames(kernel_y)<-c(colnames(x),'y_')
    # pr_A_S_1_given_Z_0_X<-unlist(kernel_y[,ncol(kernel_y)])
    # pr_A_S_0_given_Z_0_X=1-pr_A_S_1_given_Z_0_X
    
    # mean(pr_A_S_1_given_Z_1_X);mean(pr_A_S_1_given_Z_0_X)#0.3971092 0.7164054
    # pr_A_S_0_given_A_X=ifelse(A==1,pr_A_S_0_given_A_1_X,pr_A_S_0_given_A_0_X)
    # # pr_A_S_1_given_A_X=1-pr_A_S_0_given_A_X
    # pr_A_S_1_given_A_X=ifelse(A==1,pr_A_S_1_given_A_1_X,pr_A_S_1_given_A_0_X)
    # pr_A_S_0_given_Z_X=ifelse(Z==1,pr_A_S_0_given_Z_1_X,pr_A_S_0_given_Z_0_X)
    # # pr_A_S_1_given_Z_X=1-pr_A_S_0_given_Z_X
    # pr_A_S_1_given_Z_X=ifelse(Z==1,pr_A_S_1_given_Z_1_X,pr_A_S_1_given_Z_0_X)
    
    pr_Y_given_A_star_0_X<-pr_Y_given_X*pr_A_S_0_given_Y_X/pr_A_S_0_given_X
    pr_Y_given_A_star_1_X<-pr_Y_given_X*pr_A_S_1_given_Y_X/pr_A_S_1_given_X
    
    # pr_A_given_A_star_0_X<-pr_A_given_X*pr_A_S_0_given_A_X/pr_A_S_0_given_X
    pr_A_0_given_A_star_0_X<-pr_A_0_given_X*pr_A_S_0_given_A_0_X/pr_A_S_0_given_X
    # pr_A_1_given_A_star_0_X<-pr_A_1_given_X*pr_A_S_0_given_A_1_X/pr_A_S_0_given_X
    # pr_A_0_given_A_star_0_X<-1-pr_A_1_given_A_star_0_X
    pr_A_1_given_A_star_0_X=1-pr_A_0_given_A_star_0_X
    pr_A_given_A_star_0_X<-ifelse(A==1,pr_A_1_given_A_star_0_X,pr_A_0_given_A_star_0_X)
    # print('mean(pr_A_1_given_A_star_0_X);mean(A[A_==0])'); print(mean(pr_A_1_given_A_star_0_X));print(mean(A[A_==0]));# 0.2672546
    
    # pr_A_given_A_star_1_X<-pr_A_given_X*pr_A_S_1_given_A_X/pr_A_S_1_given_X
    # pr_A_1_given_A_star_1_X<-pr_A_1_given_X*pr_A_S_1_given_A_1_X/pr_A_S_1_given_X
    pr_A_0_given_A_star_1_X<-pr_A_0_given_X*pr_A_S_1_given_A_0_X/pr_A_S_1_given_X
    pr_A_1_given_A_star_1_X=1-pr_A_0_given_A_star_1_X
    pr_A_given_A_star_1_X<-ifelse(A==1,pr_A_1_given_A_star_1_X,pr_A_0_given_A_star_1_X)
    # print('mean(pr_A_1_given_A_star_1_X);mean(A[A_==1])');print(mean(pr_A_1_given_A_star_1_X));print(mean(A[A_==1])) #0.4778403
    
    # pr_Z_given_A_star_0_X<-pr_Z_given_X*pr_A_S_0_given_Z_X/pr_A_S_0_given_X
    pr_Z_1_given_A_star_0_X<-pr_Z_1_given_X*pr_A_S_0_given_Z_1_X/pr_A_S_0_given_X
    # pr_Z_0_given_A_star_0_X<-(pr_Z_0_given_X*pr_A_S_0_given_Z_0_X)/pr_A_S_0_given_X
    
    pr_Z_0_given_A_star_0_X=1-pr_Z_1_given_A_star_0_X
    pr_Z_given_A_star_0_X<-ifelse(Z==1,pr_Z_1_given_A_star_0_X,pr_Z_0_given_A_star_0_X)
    # print('mean(pr_Z_1_given_A_star_0_X);mean(Z[A_==0])'); print(mean(pr_Z_1_given_A_star_0_X));print(mean(Z[A_==0]))#0.8643745
    
    # pr_Z_given_A_star_1_X<-pr_Z_given_X*pr_A_S_1_given_Z_X/pr_A_S_1_given_X
    pr_Z_1_given_A_star_1_X<-(pr_Z_1_given_X*pr_A_S_1_given_Z_1_X)/pr_A_S_1_given_X
    # pr_Z_0_given_A_star_1_X<-(pr_Z_0_given_X*pr_A_S_1_given_Z_0_X)/pr_A_S_1_given_X
    # pr_Z_1_given_A_star_1_X<-1-pr_Z_0_given_A_star_1_X
    pr_Z_0_given_A_star_1_X<-1-pr_Z_1_given_A_star_1_X
    pr_Z_given_A_star_1_X<-ifelse(Z==1,pr_Z_1_given_A_star_1_X,pr_Z_0_given_A_star_1_X)
    # 
    # print('mean(pr_Z_1_given_A_star_1_X);mean(Z[A_==1])'); print(mean(pr_Z_1_given_A_star_1_X));print(mean(Z[A_==1]))#0.6607444
    # pr_Z_0_given_A_star_1_X<-pr_Z_0_given_X*pr_A_S_1_given_Z_0_X/pr_A_S_1_given_X
    # View(cbind(ifelse(Z==1,pr_Z_1_given_A_star_1_X,pr_Z_0_given_A_star_1_X),pr_Z_given_A_star_1_X)) 
    t=max(abs(pi-pi_save))
    pi_save<-pi
    # print(t)
    
  }
  kx<-c()
  kx<-generate_kx(data[,c('Y','X1','X2')])
  # for(i in 1:nrow(data[,c('Y','X1','X2')])){
  #   kx<-rbind(kx,cbind(data[rep(i,each=nrow(data)),c('X1','X2')],rep(data$Y)))
  # }
  x<-data[,c('Y','X1','X2')]
  t<-10
  kernel_y_<-c()
  for(i in 1:t){
    kxx<-kx[((i-1)*(nrow(kx)/t)+1):((i)*(nrow(kx)/t)),]
    kxx<-as.data.frame(kxx)
    kernel_y<-get_pre_y(as.matrix(x),pi,as.matrix(kxx))
    colnames(kernel_y)<-c(colnames(x),'y_')
    
    kernel_y_<-rbind(kernel_y_,kernel_y)
    print(i)
  }
  p1<-unlist(kernel_y_[,ncol(kernel_y_)])
  p0=1-p1
  ##### a new try #####
  pr_Y_given_X<-pr_Y_given_X_save
  # }
  ii<-cbind(pr_Y_given_X,p1,p0)
  # pr_Y_given_A_star_1_X_all<-c()
  # pr_Y_given_A_star_0_X_all<-c()
  # 
  # for(j in 1:(nrow(ii)/nrow(data))){
  #   o_1<-ii[((j-1)*nrow(data)+1):(j*nrow(data)),1]
  #   o_2<-ii[((j-1)*nrow(data)+1):(j*nrow(data)),2]
  #   o_4<-ii[((j-1)*nrow(data)+1):(j*nrow(data)),3]
  #   
  #   o_3<-pr_A_S_1_given_X
  #   o_5<-pr_A_S_0_given_X
  #   pr_Y_given_A_star_1_X_all<-c(pr_Y_given_A_star_1_X_all,vector_operation(o_1,o_2,o_3))
  #   pr_Y_given_A_star_0_X_all<-c(pr_Y_given_A_star_0_X_all,vector_operation(o_1,o_4,o_5))
  #   if(j %% 200==0) print(j)
  # }
  nrow_data<-nrow(data)
  li<-fast_vector_operations(as.matrix(ii),pr_A_S_1_given_X,pr_A_S_0_given_X,nrow_data)
  pr_Y_given_A_star_1_X_all<-li$pr_Y_given_A_star_1_X_all
  pr_Y_given_A_star_0_X_all<-li$pr_Y_given_A_star_0_X_all
  E_Y_given_A_star_1_X<-c();E_Y_given_A_star_0_X<-c()
  
  for(j in 1:(length(pr_Y_given_A_star_1_X_all)/nrow(data))){
    o_1<-pr_Y_given_A_star_1_X_all[((j-1)*nrow(data)+1):(j*nrow(data))]
    o_2<-pr_Y_given_A_star_0_X_all[((j-1)*nrow(data)+1):(j*nrow(data))]
    o_1<-o_1/sum(o_1)
    o_2<-o_2/sum(o_2)
    E_Y_given_A_star_1_X<-c(E_Y_given_A_star_1_X,sum(o_1*Y,na.rm = T))
    E_Y_given_A_star_0_X<-c(E_Y_given_A_star_0_X,sum(o_2*Y,na.rm = T))
    
  }
  # phi1<-(A-pr_A_1_given_A_star_0_X)*(Z-pr_Z_1_given_A_star_0_X)*(Y-E_Y_given_A_star_1_X)/((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))+E_Y_given_A_star_1_X
  # # phi1<-(A-pr_A_given_A_star_0_X)*(Z-pr_Z_given_A_star_0_X)*(Y-E_Y_given_A_star_1_X)/((pr_A_given_A_star_1_X-pr_A_given_A_star_0_X)*(pr_Z_given_A_star_1_X-pr_Z_given_A_star_0_X)*(pr_A_S_1_given_X))-E_Y_given_A_star_1_X
  # 
  # phi1<-mean(phi1);phi1
  
  
  phi1_1<-(A-pr_A_1_given_A_star_0_X)*(Z-pr_Z_1_given_A_star_0_X)*(Y-E_Y_given_A_star_1_X)+E_Y_given_A_star_1_X*((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))
  phi1_2<-((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))
  # phi1<-(A-pr_A_given_A_star_0_X)*(Z-pr_Z_given_A_star_0_X)*(Y-E_Y_given_A_star_1_X)/((pr_A_given_A_star_1_X-pr_A_given_A_star_0_X)*(pr_Z_given_A_star_1_X-pr_Z_given_A_star_0_X)*(pr_A_S_1_given_X))-E_Y_given_A_star_1_X
  phi1<-mean(phi1_1)/mean(phi1_2);phi1
  
  # phi0<-(A-pr_A_1_given_A_star_1_X)*(Z-pr_Z_1_given_A_star_1_X)*(Y-E_Y_given_A_star_0_X)/((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))+E_Y_given_A_star_0_X
  # phi0<-mean(phi0);phi0
  phi0_1<-(A-pr_A_1_given_A_star_1_X)*(Z-pr_Z_1_given_A_star_1_X)*(Y-E_Y_given_A_star_0_X)+E_Y_given_A_star_0_X*((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))
  phi0_2<-((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))
  phi0<-mean(phi0_1)/mean(phi0_2);phi0
  
  # doc<-addtitle(paste('phi1',phi1,sep=' '),1,doc)
  # doc<-addtitle(paste('phi0',phi0,sep=' '),1,doc)
  phi1.1<-phi1;phi0.1<-phi0
  
  phi1<-(A-pr_A_1_given_A_star_0_X)*(Z-pr_Z_1_given_A_star_0_X)*(Y-E_Y_given_A_star_1_X)/((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))+E_Y_given_A_star_1_X
  phi1<-mean(phi1);phi1
  phi0<-(A-pr_A_1_given_A_star_1_X)*(Z-pr_Z_1_given_A_star_1_X)*(Y-E_Y_given_A_star_0_X)/((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))+E_Y_given_A_star_0_X
  phi0<-mean(phi0);phi0
  data_save<-data
  # save(data,A,pr_A_1_given_A_star_0_X,Z,pr_Z_1_given_A_star_0_X,
  #      Y,E_Y_given_A_star_1_X,pr_A_1_given_A_star_1_X,pr_A_1_given_A_star_0_X,
  #      pr_Z_1_given_A_star_1_X,pr_Z_1_given_A_star_0_X,pr_A_S_1_given_X,E_Y_given_A_star_1_X,
  #      E_Y_given_A_star_0_X,pr_A_S_0_given_X,E_Y_given_A_star_0_X,
  #      file='/home/qixuezhu1/R/data_save/50000_r_n_n_n_200.RData')
  
  # doc<-addtitle(paste('phi1',phi1,sep=' '),1,doc)
  # doc<-addtitle(paste('phi0',phi0,sep=' '),1,doc)
  # 
  # doc<-addtitle(paste('mean(A-pr_A_1_given_A_star_0_X)',mean(A-pr_A_1_given_A_star_0_X),sep=' '),4,doc)
  # doc<-addtitle(paste('mean(Z-pr_Z_1_given_A_star_0_X)',mean(Z-pr_Z_1_given_A_star_0_X),sep=' '),4,doc)
  # doc<-addtitle(paste('mean(Y-E_Y_given_A_star_1_X)',mean(Y-E_Y_given_A_star_1_X),sep=' '),4,doc)
  # doc<-addtitle(paste('mean(pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)',mean(pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X),sep=' '),4,doc)
  # doc<-addtitle(paste('mean(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)', mean(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X),sep=' '),4,doc)
  # doc<-addtitle(paste('mean(pr_A_S_1_given_X)',mean(pr_A_S_1_given_X),sep=' '),4,doc)
  # doc<-addtitle(paste('mean(A_)', mean(A_),sep=' '),4,doc)
  # doc<-addtitle(paste('mean(E_Y_given_A_star_1_X)',mean(E_Y_given_A_star_1_X),sep=' '),4,doc)
  # doc<-addtitle(paste('mean(pr_A_1_given_A_star_0_X)',mean(pr_A_1_given_A_star_0_X),sep=' '),4,doc)
  # doc<-addtitle(paste('mean(A[A_==0])',mean(A[A_==0]),sep=' '),4,doc)
  # doc<-addtitle(paste('mean(pr_A_1_given_A_star_1_X)',mean(pr_A_1_given_A_star_1_X),sep=' '),4,doc)
  # doc<-addtitle(paste('mean(A[A_==1])',mean(A[A_==1]),sep=' '),4,doc)
  # doc<-addtitle(paste('mean(pr_Z_1_given_A_star_0_X)',mean(pr_Z_1_given_A_star_0_X),sep=' '),4,doc)
  # doc<-addtitle(paste('mean(Z[A_==0])',mean(Z[A_==0]),sep=' '),4,doc)
  # doc<-addtitle(paste('mean(pr_Z_1_given_A_star_1_X)',mean(pr_Z_1_given_A_star_1_X),sep=' '),4,doc)
  # doc<-addtitle(paste('mean(Z[A_==1])',mean(Z[A_==1]),sep=' '),4,doc)
  data_save_<-rbind(data_save_,c(round(phi0.1,3),round(phi1.1,3),
                                 round(phi0,3),round(phi1,3),
                                 round(mean(pr_A_1_given_A_star_0_X),3),round(mean(A[A_==0]),3),
                                 round(mean(pr_A_1_given_A_star_1_X),3),round(mean(A[A_==1]),3),
                                 round(mean(pr_Z_1_given_A_star_0_X),3),round(mean(Z[A_==0]),3),
                                 round(mean(pr_Z_1_given_A_star_1_X),3),round(mean(Z[A_==1]),3),
                                 round(mean(pr_A_S_1_given_X),3),round(mean(A_),3),
                                 round(mean(pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X),3),
                                 round(mean(A[A_==1])-mean(A[A_==0]),3),
                                 round(mean(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X),3),
                                 round(mean(Z[A_==1])-mean(Z[A_==0]),3)))
  
  phi1<-(A-pr_A_1_given_A_star_0_X)*(Z-pr_Z_1_given_A_star_0_X)*(Y-E_Y_given_A_star_1_X)/((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))+E_Y_given_A_star_1_X
  phi0<-(A-pr_A_1_given_A_star_1_X)*(Z-pr_Z_1_given_A_star_1_X)*(Y-E_Y_given_A_star_0_X)/((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))+E_Y_given_A_star_0_X
  numerator_1<-(A-pr_A_1_given_A_star_0_X)*(Z-pr_Z_1_given_A_star_0_X)*(Y-E_Y_given_A_star_1_X)
  denominator_1<-((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))
  add_1<-E_Y_given_A_star_1_X
  numerator_0<-(A-pr_A_1_given_A_star_1_X)*(Z-pr_Z_1_given_A_star_1_X)*(Y-E_Y_given_A_star_0_X)
  denominator_0<-((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))
  add_0<-E_Y_given_A_star_0_X
  phi = phi1-phi0
  truth_Y<-(X1+X2)
  X=data[,c('X1','X2')]
  Y<-phi
  
  # psi<-list()
  # psi[["numerator"]]<-phi1_1*phi0_2-phi0_1*phi1_2
  # psi[["denominator"]]<-phi0_2*phi1_2
  #caculate instrument
  # W.centered <- rep(1,n)
  data<-data_save
  X<-data[,c('X1','X2')]
  num.trees= 4000
  clusters= numeric(0)
  sample.weights = NULL;equalize.cluster.weights = FALSE
  samples.per.cluster<- validate_equalize_cluster_weights(equalize.cluster.weights, clusters, sample.weights)
  sample.fraction= 0.5
  mtry= 8
  min.node.size= 6
  honesty= TRUE
  honesty.fraction= 0.5
  honesty.prune.leaves= TRUE
  alpha= 0.05
  imbalance.penalty= 0
  stabilize.splits= TRUE
  ci.group.size= 4
  compute.oob.predictions = TRUE
  num.threads= 0
  seed=runif(1, 0, .Machine$integer.max)
  tune.parameters = "none";
  # A<-
  data <- create_train_matrices(X, outcome = A, instrument = rep(0,n),sample.weights = sample.weights,numerator_1=numerator_1,denominator_1=denominator_1,add_1=add_1,
                                numerator_0=numerator_0,denominator_0=denominator_0,add_0=add_0)
  args <- list(num.trees = num.trees,
               clusters = clusters,
               samples.per.cluster = samples.per.cluster,
               sample.fraction = sample.fraction,
               mtry = mtry,
               min.node.size = min.node.size,
               honesty = honesty,
               honesty.fraction = honesty.fraction,
               honesty.prune.leaves = honesty.prune.leaves,
               alpha = alpha,
               imbalance.penalty = imbalance.penalty,
               ci.group.size = ci.group.size,
               compute.oob.predictions = compute.oob.predictions,
               num.threads = num.threads,
               seed = seed,
               legacy.seed = get_legacy_seed())
  forest <- do.call.rcpp(regression_train, c(data, args))
  # chech<-forest$`_leaf_samples`[[1]]
  # for(oo in 1:length(chech)){
  #   print(A[chech[[oo]]])
  # }
  # summary(forest$predictions[A==0])
  # plot(forest$predictions,A)
  W.hat <- forest$predictions
  W.centered <- A - W.hat
  W.centered <-W.centered*1.2
  # W.centered<-rep(1,n)
  num.trees = 3000;
  sample.weights = NULL;
  clusters = NULL;
  equalize.cluster.weights = FALSE;
  sample.fraction = 0.5;
  # mtry = min(ceiling(sqrt(ncol(X)) + 20), ncol(X));
  mtry = 10;
  
  min.node.size = 5;
  honesty = TRUE;
  honesty.fraction = 0.5;
  honesty.prune.leaves = TRUE;
  alpha = 0.05;
  imbalance.penalty = 0;
  ci.group.size = 5;
  tune.parameters = "none";
  tune.num.trees = 50;
  tune.num.reps = 100;
  tune.num.draws = 1000;
  compute.oob.predictions = TRUE;
  num.threads = NULL;
  seed = runif(1, 0, .Machine$integer.max)
  validate_sample_weights(sample.weights, X)
  Y <- validate_observations(Y, X)
  clusters <- validate_clusters(clusters, X)
  samples.per.cluster <- validate_equalize_cluster_weights(equalize.cluster.weights, clusters, sample.weights)
  num.threads <- validate_num_threads(num.threads)

  all.tunable.params <- c("sample.fraction", "mtry", "min.node.size", "honesty.fraction",
                          "honesty.prune.leaves", "alpha", "imbalance.penalty")
  default.parameters <- list(sample.fraction = 0.5,
                             mtry =10, # min(ceiling(sqrt(ncol(X)) + 20), ncol(X)),
                             min.node.size = 7,
                             honesty.fraction = 0.5,
                             honesty.prune.leaves = TRUE,
                             alpha = 0.05,
                             imbalance.penalty = 0)
  # data<-list()
  data <- create_train_matrices(X, outcome = truth_Y, instrument = W.centered,sample.weights = sample.weights,numerator_1=numerator_1,denominator_1=denominator_1,add_1=add_1,
                                numerator_0=numerator_0,denominator_0=denominator_0,add_0=add_0)
  #truth_Y
  # # data$train.matrix<-as.matrix(cbind(X,Y,numerator_1,denominator_1,add_1,numerator_0,denominator_0,add_0,rep(1,n)))
  # # data[['outcome_index']]<-2
  # # data[['sample_weight_index']]<-9
  # # data[['use_sample_weights']]<-FALSE
  # # data[['numerator_1_index']]<-3
  # # data[['denominator_1_index']]<-4
  # # data[['add_1_index']]<-5
  # # data[['numerator_0_index']]<-6
  # # data[['denominator_0_index']]<-7
  # # data[['add_0_index']]<-8
  # 
  mtry=7
  args <- list(num.trees = 2000,
               clusters = clusters,
               samples.per.cluster = samples.per.cluster,
               sample.fraction = sample.fraction,
               mtry = mtry,
               min.node.size = min.node.size,
               honesty = honesty,
               honesty.fraction = honesty.fraction,
               honesty.prune.leaves = honesty.prune.leaves,
               alpha = alpha,
               imbalance.penalty = imbalance.penalty,
               ci.group.size = ci.group.size,
               compute.oob.predictions = compute.oob.predictions,
               num.threads = num.threads,
               seed = seed,
               legacy.seed = get_legacy_seed())
  forest <- do.call.rcpp(regression_train, c(data, args))
  preds = forest
  df = data.frame(predictions = preds$predictions,
                  truth = truth_Y,
                  upper = preds$predictions + 1.96*sqrt(preds$variance.estimates),
                  lower = preds$predictions - 1.96*sqrt(preds$variance.estimates))
  truth = truth_Y
  # plot( preds$predictions,truth_Y)
  # 
  ######dn_start######
  # phi1_1<-(A-pr_A_1_given_A_star_0_X)*(Z-pr_Z_1_given_A_star_0_X)*(Y-E_Y_given_A_star_1_X)+E_Y_given_A_star_1_X*((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))
  # phi1_2<-((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))
  # 
  # phi0_1<-(A-pr_A_1_given_A_star_1_X)*(Z-pr_Z_1_given_A_star_1_X)*(Y-E_Y_given_A_star_0_X)+E_Y_given_A_star_0_X*((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))
  # phi0_2<-((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))
  # numerator_1<-(A-pr_A_1_given_A_star_0_X)*(Z-pr_Z_1_given_A_star_0_X)*(Y-E_Y_given_A_star_1_X)
  # denominator_1<-((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))
  # add_1<-E_Y_given_A_star_1_X
  # numerator_0<-(A-pr_A_1_given_A_star_1_X)*(Z-pr_Z_1_given_A_star_1_X)*(Y-E_Y_given_A_star_0_X)
  # denominator_0<-((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))
  # add_0<-E_Y_given_A_star_0_X
  # 
  # psi<-list()
  # psi[["numerator"]]<-phi1_1*phi0_2-phi0_1*phi1_2
  # psi[["denominator"]]<-phi0_2*phi1_2
  # 
  # # W.centered <- rep(1,n)
  # 
  # X<-data[,c('X1','X2')]
  # num.trees= 2000
  # clusters= numeric(0)
  # sample.weights = NULL;equalize.cluster.weights = FALSE
  # samples.per.cluster<- validate_equalize_cluster_weights(equalize.cluster.weights, clusters, sample.weights)
  # sample.fraction= 0.5
  # mtry= 9
  # min.node.size= 5
  # honesty= TRUE
  # honesty.fraction= 0.5
  # honesty.prune.leaves= TRUE
  # alpha= 0.05
  # imbalance.penalty= 0
  # stabilize.splits= TRUE
  # ci.group.size= 4
  # compute.oob.predictions = TRUE
  # num.threads= 0
  # seed=runif(1, 0, .Machine$integer.max)
  # tune.parameters = "none";
  # data <- create_train_matrices(X, outcome = A, sample.weights = sample.weights,numerator_1=numerator_1,denominator_1=denominator_1,add_1=add_1,
  #                               numerator_0=numerator_0,denominator_0=denominator_0,add_0=add_0)
  # args <- list(num.trees = num.trees,
  #              clusters = clusters,
  #              samples.per.cluster = samples.per.cluster,
  #              sample.fraction = sample.fraction,
  #              mtry = mtry,
  #              min.node.size = min.node.size,
  #              honesty = honesty,
  #              honesty.fraction = honesty.fraction,
  #              honesty.prune.leaves = honesty.prune.leaves,
  #              alpha = alpha,
  #              imbalance.penalty = imbalance.penalty,
  #              ci.group.size = ci.group.size,
  #              compute.oob.predictions = compute.oob.predictions,
  #              num.threads = num.threads,
  #              seed = seed,
  #              legacy.seed = get_legacy_seed())
  # forest <- do.call.rcpp(regression_train, c(data, args))
  # 
  # # forest.W <- regression_forest(X,A, num.trees = max(50, num.trees / 4),
  # #                               sample.weights = sample.weights, clusters = clusters,
  # #                               equalize.cluster.weights = equalize.cluster.weights,
  # #                               sample.fraction = sample.fraction, mtry = mtry,
  # #                               min.node.size = 5, honesty = TRUE,
  # #                               honesty.fraction = 0.5, honesty.prune.leaves = TRUE,
  # #                               alpha = alpha, imbalance.penalty = imbalance.penalty,
  # #                               ci.group.size = 1, tune.parameters = tune.parameters,
  # #                               compute.oob.predictions = TRUE,
  # #                               num.threads = num.threads, seed = seed)
  # W.hat <- forest$predictions
  # W.hat<- rep(0,n)
  # W.centered <- A - W.hat
  # D<- rep(1,n)
  # sample.weights<- NULL
  # 
  # data_ <- create_train_matrices(X,
  #                               treatment = W.centered,
  #                               survival.numerator = psi[["numerator"]],
  #                               survival.denominator = psi[["denominator"]],
  #                               censor = D,
  #                               sample.weights = sample.weights)
  # 
  # args <- list(num.trees = num.trees,
  #              clusters = clusters,
  #              samples.per.cluster = samples.per.cluster,
  #              sample.fraction = sample.fraction,
  #              mtry = mtry,
  #              min.node.size = min.node.size,
  #              honesty = honesty,
  #              honesty.fraction = honesty.fraction,
  #              honesty.prune.leaves = honesty.prune.leaves,
  #              alpha = alpha,
  #              imbalance.penalty = imbalance.penalty,
  #              stabilize.splits = stabilize.splits,
  #              ci.group.size = ci.group.size,
  #              compute.oob.predictions = compute.oob.predictions,
  #              num.threads = num.threads,
  #              seed = seed,
  #              legacy.seed = get_legacy_seed())
  # forest <- do.call.rcpp(causal_survival_train, c(data_,args))
  # truth_Y<-(X1+X2)
  # df = data.frame(predictions = forest$predictions,
  #                 truth = truth_Y,
  #                 upper = forest$predictions + 1.96*sqrt(forest$variance.estimates),
  #                 lower = forest$predictions - 1.96*sqrt(forest$variance.estimates))
  # truth = truth_Y
  #dn_end
  # plot(truth_Y,forest$predictions)
  ######coverage part#####
  percent_llf=0;avg_llf=0;
  n<-nrow(df)
  for(i in 1:n){
    xlow = ifelse(is.na(df$lower[i]),0,df$lower[i])
    xup = ifelse(is.na(df$upper[i]),0,df$upper[i])
    truthi = truth[i]
    if(xlow <= truthi && truthi <= xup){
      percent_llf = percent_llf + 1;
    }
    avg_llf = avg_llf + abs(xup - xlow)
  }
  percent_llf = percent_llf/n;percent_llf
  mse<-sum((forest$predictions-truth_Y)^2)/n;mse
  avg_llf = avg_llf/n;avg_llf
  data_cov<-rbind(data_cov,c(percent_llf,mse,avg_llf))
  print(c(percent_llf,mse,avg_llf))
  list_save[[(time_)]]<-df
  time_=time_+1
  print(time_)
  
}
colnames(data_save_)<-c('phi0.1','phi1.1','phi0','phi1',
                        'mA|A_0','T_mA|A_0','mA|A_1','T_mA|A_1',
                        'mZ|A_0','T_mZ|A_0','mZ|A_1','T_mZ|A_1',
                        'mA_1','T_mA_1',
                        'mA|A_1-mA|A_0','T_mA|A_1-mA|A_0',
                        'mZ|A_1-mZ|A_0','T_mZ|A_1-mZ|A_0')
data_save_<-as.data.frame(data_save_)
lapply(data_save_, mean)
colnames(data_cov)<-c('percent_llf','mse','avg_llf')
# data_cov<-data_cov[!data_cov$percent_llf<0.9,]
data_cov<-as.data.frame(data_cov)
lapply(data_cov, mean)
save(data_save_,data_cov,list_save,file='2000_r_n_n_n_200.RData')
i=22;
plot(list_save[[i]]$truth,list_save[[i]]$predictions)
##50000
# data_save_<-as.matrix(data_save_)
# View(list_save[[i]])
one_t<-c();cy_save<-c();sd<-c();avgllf<-c()
n=nrow(list_save[[1]])
for(t in 1:n){
  percent_llf=0
  avg_llf_=0
  for(i in 1:length(list_save)){
    # one_t<-rbind(one_t,list_save[[i]][t,])
    if(length(list_save[[i]])!=0){
      xlow = ifelse(is.na(list_save[[i]][t,'lower']),0,list_save[[i]][t,'lower'])
      xup = ifelse(is.na(list_save[[i]][t,'upper']),0,list_save[[i]][t,'upper'])
      truthi = list_save[[i]][t,'truth']
      if(xlow <= truthi && truthi <= xup){
        percent_llf = percent_llf + 1;
      }
      avg_llf_=avg_llf_+xup-xlow
    }
    
    
  }
  cy_save<-c(cy_save,percent_llf/(length(list_save)))
  avgllf<-c(avgllf,avg_llf_/(length(list_save)))
  sd<-c(sd,avg_llf_/(length(list_save)*1.96*2))

  if(t%%500==0) print(t)
}
sd(cy_save);sd(avgllf);sd(sd)
percent_llf_1=0;cy_save_1<-c()
for(i in 1:length(list_save)){
  percent_llf_1=0
  if(length(list_save[[i]])!=0){
    for(t in 1:nrow(data)){
      xlow = ifelse(is.na(list_save[[i]]$lower[t]),0,list_save[[i]]$lower[t])
      xup = ifelse(is.na(list_save[[i]]$upper[t]),0,list_save[[i]]$upper[t])
      truthi = list_save[[i]]$truth[t]
      if(xlow <= truthi && truthi <= xup){
        percent_llf_1 = percent_llf_1 + 1;
      }
      cy_save_1<-c(cy_save_1,percent_llf_1/nrow(data))
    }
  }
  
 
}
data_save_1<-data_save_
print(doc, target = "12.docx")

#####catgiroy#####
time_=1
data_save_<-c()
data_cov<-c()
list_save<-list()

while(time_<150){
  n<-5000
  X1<-runif(n)
  X2<-runif(n)
  
  prop<-1/(1+exp(X1)+exp(X2))
  A_<- rbinom(n,size = 1, prob = prop)
  

  pr_A_1_given_A_1_X <- plogis(1 + 2 * X1)
  
  pr_A_1_given_A_0_X <- plogis(-1 + 0.5 * X1 - X2)
  
  A <- ifelse(A_ == 1, 
              rbinom(n, size = 1, prob = pr_A_1_given_A_1_X), 
              rbinom(n, size = 1, prob = pr_A_1_given_A_0_X))

  pr_Z_1_given_A_1_X <- plogis(-1 - X1 + 0.5 * X2)
  
  
  pr_Z_1_given_A_0_X <- plogis(2 + X1)
  
  Z <- ifelse(A_ == 1, 
              rbinom(n, size = 1, prob = pr_Z_1_given_A_1_X), 
              rbinom(n, size = 1, prob = pr_Z_1_given_A_0_X))
  

  pr_Y_1_given_A_1 <- plogis(-2 + 0.5 * X1 + X2)
  
  pr_Y_1_given_A_0 <- plogis(1 - 2 * X1 + 0.5 * X2)
  
  Y <- ifelse(A_ == 1,
              rbinom(n, size = 1, prob = pr_Y_1_given_A_1),
              rbinom(n, size = 1, prob = pr_Y_1_given_A_0))
  # sgm<-rnorm(n)
  # Y <- sin(3.1415926*X1)+(A_-0.5)*(X1+X2)+sgm
  # library(gplm)
  data <- data.frame( X1 = X1, X2 = X2, Y = Y,Z=Z,A=A,A_star = A_)
  data[,c('A','X1','X2','Y')]<-lapply(data[,c('A','X1','X2','Y')],as.numeric)
  #set 1
  p0_A_star_1_given_X=runif(n);p0_A_star_0_given_X=1-p0_A_star_1_given_X
  p0_Y_given_A_star_1_X=runif(n);p0_Y_given_A_star_0_X=runif(n)
  p0_A_1_given_A_star_1_X=runif(n,0.5,1);p0_A_0_given_A_star_1_X=1-p0_A_1_given_A_star_1_X
  p0_A_1_given_A_star_0_X=runif(n,0,0.5);p0_A_0_given_A_star_0_X=1-p0_A_1_given_A_star_0_X
  p0_Z_1_given_A_star_1_X=runif(n,0,0.5);p0_Z_0_given_A_star_1_X=1-p0_Z_1_given_A_star_1_X
  p0_Z_1_given_A_star_0_X=runif(n,0.5,1);p0_Z_0_given_A_star_0_X=1-p0_Z_1_given_A_star_0_X
  
  kx<-x <- data[,c('X1','X2')]
  a<-data[,c('A')]
  A_one=1
  A_a<-A_one-A
  A_a<-as.data.frame(A_a)
  K <-kernels(A_a)
  Ksum <- sum(K)
  pa<-K/(max(K))#+min(K)
  kernel_y<-get_pre_y(as.matrix(x),pa,as.matrix(kx))
  colnames(kernel_y)<-c(colnames(x),'y_')
  pr_A_1_given_X<-unlist(kernel_y[,ncol(kernel_y)])#0.3556
  pr_A_0_given_X<-1-pr_A_1_given_X
  pr_A_given_X<-ifelse(A==1,pr_A_1_given_X,pr_A_0_given_X)

  z<-data[,c('Z')]
  Z_one=1
  Z_a<-Z_one-Z
  Z_a<-as.data.frame(Z_a)
  K <-kernels(Z_a)
  Ksum <- sum(K)
  pz<-K/(max(K))#+min(K)
  kernel_y<-get_pre_y(as.matrix(x),pz,as.matrix(kx))
  colnames(kernel_y)<-c(colnames(x),'y_')
  pr_Z_1_given_X<-unlist(kernel_y[,ncol(kernel_y)])
  pr_Z_0_given_X<-1-pr_Z_1_given_X
  pr_Z_given_X<-ifelse(Z==1,pr_Z_1_given_X,pr_Z_0_given_X)
  
  y=data[,c('Y')]
  Y_one=1
  Y_a<-Y_one-Y
  Y_a<-as.data.frame(Y_a)
  K <-kernels(Y_a)
  Ksum <- sum(K)
  py<-K/(max(K))#+min(K)
  kernel_y<-get_pre_y(as.matrix(x),py,as.matrix(kx))
  colnames(kernel_y)<-c(colnames(x),'y_')
  pr_Y_1_given_X<-unlist(kernel_y[,ncol(kernel_y)])
  pr_Y_0_given_X<-1-pr_Y_1_given_X
  pr_Y_given_X<-ifelse(Y==1,pr_Y_1_given_X,pr_Y_0_given_X)
  
  
    ##### #####  
  pr_A_S_1_given_X=p0_A_star_1_given_X
  pr_A_S_0_given_X=p0_A_star_0_given_X
  pr_Y_given_A_star_1_X=p0_Y_given_A_star_1_X
  pr_Y_given_A_star_0_X=p0_Y_given_A_star_0_X
  pr_A_given_A_star_1_X<-ifelse(A==1,p0_A_1_given_A_star_1_X,p0_A_0_given_A_star_1_X)
  pr_Z_given_A_star_1_X<-ifelse(Z==1,p0_Z_1_given_A_star_1_X,p0_Z_0_given_A_star_1_X)
  pr_A_given_A_star_0_X<-ifelse(A==1,p0_A_1_given_A_star_0_X,p0_A_0_given_A_star_0_X)
  pr_Z_given_A_star_0_X<-ifelse(Z==1,p0_Z_1_given_A_star_0_X,p0_Z_0_given_A_star_0_X)
  pi_save<-0
  t=1;o=1
  while(t > 0.001){
    o=o+1
    # print(o)
    pA1<-pr_A_S_1_given_X*pr_Y_given_A_star_1_X*pr_A_given_A_star_1_X*pr_Z_given_A_star_1_X
    pA0<-pr_A_S_0_given_X*pr_Y_given_A_star_0_X*pr_A_given_A_star_0_X*pr_Z_given_A_star_0_X
    
    pi<-pA1/(pA1+pA0)
    # print('mean(pi[A==1]);mean(pi[A==0])'); print(mean(pi[A==1])); print(mean(pi[A==0]))
    # print('mean(A_[A==1]);mean(A_[A==0])'); print(mean(A_[A==1])); print(mean(A_[A==0]))
    
    # mean(pi[A==1]);  mean(pi[A==0])#0.610757 0.3956043
    # mean(pi[Z==1]);  mean(pi[Z==0])#0.4113866 0.680706
    
    kx<-x <-data[,c('X1','X2')]
    kernel_y<-get_pre_y(as.matrix(x),pi,as.matrix(x))
    colnames(kernel_y)<-c(colnames(x),'y_')
    pr_A_S_1_given_X<-unlist(kernel_y[,ncol(kernel_y)])
    # View(cbind(pr_A_S_1_given_X,A_))
    pr_A_S_0_given_X=1-pr_A_S_1_given_X

    
    x<-data[data$Y==0,c('X1','X2')]
    pi_<-pi[data$Y==0]
    kx<-data[,c('X1','X2')]
    kernel_y<-get_pre_y(as.matrix(x),pi_,as.matrix(kx))
    colnames(kernel_y)<-c(colnames(x),'y_')
    pr_A_S_1_given_Y_0_X<-unlist(kernel_y[,ncol(kernel_y)])
    pr_A_S_0_given_Y_0_X=1-pr_A_S_1_given_Y_0_X

    x<-data[data$Y==1,c('X1','X2')]
    pi_<-pi[data$Y==1]
    kx<-data[,c('X1','X2')]
    kernel_y<-get_pre_y(as.matrix(x),pi_,as.matrix(kx))
    colnames(kernel_y)<-c(colnames(x),'y_')
    pr_A_S_1_given_Y_1_X<-unlist(kernel_y[,ncol(kernel_y)])
    pr_A_S_0_given_Y_1_X=1-pr_A_S_1_given_Y_1_X
    
    x<-data[data$A==0,c('X1','X2')]
    pi_<-pi[data$A==0]
    kx<-data[,c('X1','X2')]
    kernel_y<-get_pre_y(as.matrix(x),pi_,as.matrix(kx))
    colnames(kernel_y)<-c(colnames(x),'y_')
    pr_A_S_1_given_A_0_X<-unlist(kernel_y[,ncol(kernel_y)])
    pr_A_S_0_given_A_0_X=1-pr_A_S_1_given_A_0_X
    #mean(pr_A_S_1_given_A_1_X);
    # print('mean(pr_A_S_1_given_A_0_X)'); 
    # print(mean(pr_A_S_1_given_A_1_X)); 
    # print(mean(pr_A_S_1_given_A_0_X));
    #mean(pr_A_S_1_given_A_1_X);mean(pr_A_S_1_given_A_0_X)# 0.6449716 0.3829605
    
    x<-data[data$Z==1,c('X1','X2')]
    pi_<-pi[data$Z==1]
    kx<-data[,c('X1','X2')]
    kernel_y<-get_pre_y(as.matrix(x),pi_,as.matrix(kx))
    colnames(kernel_y)<-c(colnames(x),'y_')
    pr_A_S_1_given_Z_1_X<-unlist(kernel_y[,ncol(kernel_y)])
    pr_A_S_0_given_Z_1_X=1-pr_A_S_1_given_Z_1_X
    # 
# 
#     pr_Y_0_given_A_star_0_X<-pr_Y_0_given_X*pr_A_S_0_given_Y_0_X/pr_A_S_0_given_X
#     p_Y_1_given_A_star_0_X<-1-pr_Y_0_given_A_star_0_X
#     pr_Y_given_A_star_0_X<-ifelse(Y==1,pr_Y_1_given_A_star_0_X,pr_Y_0_given_A_star_0_X)
#     
#     pr_Y_0_given_A_star_1_X<-pr_Y_0_given_X*pr_A_S_1_given_Y_0_X/pr_A_S_1_given_X
#     pr_Y_1_given_A_star_1_X<-1-pr_Y_0_given_A_star_1_X
#     pr_Y_given_A_star_1_X<-ifelse(Y==1,pr_Y_1_given_A_star_1_X,pr_Y_0_given_A_star_1_X)
    

    # pr_A_given_A_star_0_X<-pr_A_given_X*pr_A_S_0_given_A_X/pr_A_S_0_given_X
    pr_A_0_given_A_star_0_X<-pr_A_0_given_X*pr_A_S_0_given_A_0_X/pr_A_S_0_given_X
    # pr_A_1_given_A_star_0_X<-pr_A_1_given_X*pr_A_S_0_given_A_1_X/pr_A_S_0_given_X
    # pr_A_0_given_A_star_0_X<-1-pr_A_1_given_A_star_0_X
    pr_A_1_given_A_star_0_X=1-pr_A_0_given_A_star_0_X
    pr_A_given_A_star_0_X<-ifelse(A==1,pr_A_1_given_A_star_0_X,pr_A_0_given_A_star_0_X)
    # print('mean(pr_A_1_given_A_star_0_X);mean(A[A_==0])'); print(mean(pr_A_1_given_A_star_0_X));print(mean(A[A_==0]));# 0.2672546
    
    pr_A_0_given_A_star_1_X<-pr_A_0_given_X*pr_A_S_1_given_A_0_X/pr_A_S_1_given_X
    pr_A_1_given_A_star_1_X=1-pr_A_0_given_A_star_1_X
    pr_A_given_A_star_1_X<-ifelse(A==1,pr_A_1_given_A_star_1_X,pr_A_0_given_A_star_1_X)
    # print('mean(pr_A_1_given_A_star_1_X);mean(A[A_==1])');print(mean(pr_A_1_given_A_star_1_X));print(mean(A[A_==1])) #0.4778403
    
    pr_Z_1_given_A_star_0_X<-pr_Z_1_given_X*pr_A_S_0_given_Z_1_X/pr_A_S_0_given_X
    pr_Z_0_given_A_star_0_X=1-pr_Z_1_given_A_star_0_X
    pr_Z_given_A_star_0_X<-ifelse(Z==1,pr_Z_1_given_A_star_0_X,pr_Z_0_given_A_star_0_X)
    # print('mean(pr_Z_1_given_A_star_0_X);mean(Z[A_==0])'); print(mean(pr_Z_1_given_A_star_0_X));print(mean(Z[A_==0]))#0.8643745
    
    pr_Z_1_given_A_star_1_X<-(pr_Z_1_given_X*pr_A_S_1_given_Z_1_X)/pr_A_S_1_given_X
    pr_Z_0_given_A_star_1_X<-1-pr_Z_1_given_A_star_1_X
    pr_Z_given_A_star_1_X<-ifelse(Z==1,pr_Z_1_given_A_star_1_X,pr_Z_0_given_A_star_1_X)
    # print('mean(pr_Z_1_given_A_star_1_X);mean(Z[A_==1])'); print(mean(pr_Z_1_given_A_star_1_X));print(mean(Z[A_==1]))#0.6607444

    # pr_Y_1_given_A_star_0_X<-pr_Y_1_given_X*pr_A_S_0_given_Y_1_X/pr_A_S_0_given_X
    pr_Y_0_given_A_star_0_X<-pr_Y_0_given_X*pr_A_S_0_given_Y_0_X/pr_A_S_0_given_X
    # pr_Y_0_given_A_star_0_X=1-pr_Y_1_given_A_star_0_X
    pr_Y_1_given_A_star_0_X=1-pr_Y_0_given_A_star_0_X
    pr_Y_given_A_star_0_X<-ifelse(Y==1,pr_Y_1_given_A_star_0_X,pr_Y_0_given_A_star_0_X)

    pr_Y_0_given_A_star_1_X<-pr_Y_0_given_X*pr_A_S_1_given_Y_0_X/pr_A_S_1_given_X
    # pr_Y_1_given_A_star_1_X<-pr_Y_1_given_X*pr_A_S_1_given_Y_1_X/pr_A_S_1_given_X
    # pr_Y_0_given_A_star_1_X=1-pr_Y_0_given_A_star_1_X
    pr_Y_1_given_A_star_1_X=1-pr_Y_0_given_A_star_1_X
    pr_Y_given_A_star_1_X<-ifelse(Y==1,pr_Y_1_given_A_star_1_X,pr_Y_0_given_A_star_1_X)

    t=max(abs(pi-pi_save))
    pi_save<-pi
    # print(t)
    
  }
  E_Y_given_A_star_1_X<-pr_Y_1_given_A_star_1_X
  E_Y_given_A_star_0_X<-pr_Y_1_given_A_star_0_X
  
  phi1<-(A-pr_A_1_given_A_star_0_X)*(Z-pr_Z_1_given_A_star_0_X)*(ifelse(Y==1,1,0)-E_Y_given_A_star_1_X)/((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))+E_Y_given_A_star_1_X
  phi0<-(A-pr_A_1_given_A_star_1_X)*(Z-pr_Z_1_given_A_star_1_X)*(ifelse(Y==1,1,0)-E_Y_given_A_star_0_X)/((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))+E_Y_given_A_star_0_X
  
  data_save_<-rbind(data_save_,c(round(mean(phi0),3),round(mean(phi1),3),
                                 round(mean(pr_A_1_given_A_star_0_X),3),round(mean(A[A_==0]),3),
                                 round(mean(pr_A_1_given_A_star_1_X),3),round(mean(A[A_==1]),3),
                                 round(mean(pr_Z_1_given_A_star_0_X),3),round(mean(Z[A_==0]),3),
                                 round(mean(pr_Z_1_given_A_star_1_X),3),round(mean(Z[A_==1]),3),
                                 round(mean(pr_A_S_1_given_X),3),round(mean(A_),3),
                                 round(mean(pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X),3),
                                 round(mean(A[A_==1])-mean(A[A_==0]),3),
                                 round(mean(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X),3),
                                 round(mean(Z[A_==1])-mean(Z[A_==0]),3)))
  phi1<-(A-pr_A_1_given_A_star_0_X)*(Z-pr_Z_1_given_A_star_0_X)*(ifelse(Y==1,1,0)-E_Y_given_A_star_1_X)/((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))+E_Y_given_A_star_1_X
  phi0<-(A-pr_A_1_given_A_star_1_X)*(Z-pr_Z_1_given_A_star_1_X)*(ifelse(Y==1,1,0)-E_Y_given_A_star_0_X)/((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))+E_Y_given_A_star_0_X
  numerator_1<-(A-pr_A_1_given_A_star_0_X)*(Z-pr_Z_1_given_A_star_0_X)*(Y-E_Y_given_A_star_1_X)
  denominator_1<-((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))
  add_1<-E_Y_given_A_star_1_X
  numerator_0<-(A-pr_A_1_given_A_star_1_X)*(Z-pr_Z_1_given_A_star_1_X)*(Y-E_Y_given_A_star_0_X)
  denominator_0<-((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))
  add_0<-E_Y_given_A_star_0_X
  phi = phi1-phi0
  truth_Y<-(pr_Y_1_given_A_1-pr_Y_1_given_A_0)
  X<-data[,c('X1','X2')]
  Y<-phi

  num.trees= 4000
  clusters= numeric(0)
  sample.weights = NULL;equalize.cluster.weights = FALSE
  samples.per.cluster<- validate_equalize_cluster_weights(equalize.cluster.weights, clusters, sample.weights)
  sample.fraction= 0.5
  mtry= 8
  min.node.size= 6
  honesty= TRUE
  honesty.fraction= 0.5
  honesty.prune.leaves= TRUE
  alpha= 0.05
  imbalance.penalty= 0
  stabilize.splits= TRUE
  ci.group.size= 4
  compute.oob.predictions = TRUE
  num.threads= 0
  seed=runif(1, 0, .Machine$integer.max)
  tune.parameters = "none";
  # A<-
  # data <- create_train_matrices(X, outcome = A, instrument = rep(0,n),sample.weights = sample.weights,numerator_1=numerator_1,denominator_1=denominator_1,add_1=add_1,
  #                               numerator_0=numerator_0,denominator_0=denominator_0,add_0=add_0)
  data <- create_train_matrices(X,X, outcome = A, instrument = rep(0,n),sample.weights = sample.weights,numerator_1=numerator_1,denominator_1=denominator_1,add_1=add_1,
                                numerator_0=numerator_0,denominator_0=denominator_0,add_0=add_0,
                                outcome_t = A)
   args <- list(num.trees = num.trees,
               clusters = clusters,
               samples.per.cluster = samples.per.cluster,
               sample.fraction = sample.fraction,
               mtry = mtry,
               min.node.size = min.node.size,
               honesty = honesty,
               honesty.fraction = honesty.fraction,
               honesty.prune.leaves = honesty.prune.leaves,
               alpha = alpha,
               imbalance.penalty = imbalance.penalty,
               ci.group.size = ci.group.size,
               compute.oob.predictions = compute.oob.predictions,
               num.threads = num.threads,
               seed = seed,
               legacy.seed = get_legacy_seed())
  forest <- do.call.rcpp(regression_train, c(data, args))
  W.hat <- forest$predictions
  W.centered <- A - W.hat
  W.centered <-W.centered*2
  # W.centered<-rep(1,n)
  num.trees = 3000;
  sample.weights = NULL;
  clusters = NULL;
  equalize.cluster.weights = FALSE;
  sample.fraction = 0.5;
  # mtry = min(ceiling(sqrt(ncol(X)) + 20), ncol(X));
  mtry = 10;
  
  min.node.size = 4.5;
  honesty = TRUE;
  honesty.fraction = 0.5;
  honesty.prune.leaves = TRUE;
  alpha = 0.05;
  imbalance.penalty = 0;
  ci.group.size = 5;
  tune.parameters = "none";
  tune.num.trees = 50;
  tune.num.reps = 100;
  tune.num.draws = 1000;
  compute.oob.predictions = TRUE;
  num.threads = NULL;
  seed = runif(1, 0, .Machine$integer.max)
  validate_sample_weights(sample.weights, X)
  Y <- validate_observations(Y, X)
  clusters <- validate_clusters(clusters, X)
  samples.per.cluster <- validate_equalize_cluster_weights(equalize.cluster.weights, clusters, sample.weights)
  num.threads <- validate_num_threads(num.threads)
  # data <- create_train_matrices(X, outcome = Y, instrument = W.centered,sample.weights = sample.weights,numerator_1=numerator_1,denominator_1=denominator_1,add_1=add_1,
  #                               numerator_0=numerator_0,denominator_0=denominator_0,add_0=add_0)
  data <- create_train_matrices(X,X, outcome = Y, instrument = W.centered,sample.weights = sample.weights,numerator_1=numerator_1,denominator_1=denominator_1,add_1=add_1,
                                numerator_0=numerator_0,denominator_0=denominator_0,add_0=add_0,
                                outcome_t = Y)
  
   mtry=8
  args <- list(num.trees = 4000,
               clusters = clusters,
               samples.per.cluster = samples.per.cluster,
               sample.fraction = sample.fraction,
               mtry = mtry,
               min.node.size = min.node.size,
               honesty = honesty,
               honesty.fraction = honesty.fraction,
               honesty.prune.leaves = honesty.prune.leaves,
               alpha = alpha,
               imbalance.penalty = imbalance.penalty,
               ci.group.size = ci.group.size,
               compute.oob.predictions = compute.oob.predictions,
               num.threads = num.threads,
               seed = seed,
               legacy.seed = get_legacy_seed())
  forest <- do.call.rcpp(regression_train, c(data, args))
  preds = forest
  df = data.frame(predictions = preds$predictions,
                  truth = truth_Y,
                  upper = preds$predictions + 1.96*sqrt(preds$variance.estimates),
                  lower = preds$predictions - 1.96*sqrt(preds$variance.estimates))
  truth = truth_Y
  
  
  # plot(truth_Y,forest$predictions)
  ######coverage part#####
  percent_llf=0;avg_llf=0;
  n<-nrow(df)
  for(i in 1:n){
    xlow = ifelse(is.na(df$lower[i]),0,df$lower[i])
    xup = ifelse(is.na(df$upper[i]),0,df$upper[i])
    truthi = truth[i]
    if(xlow <= truthi && truthi <= xup){
      percent_llf = percent_llf + 1;
    }
    avg_llf = avg_llf + abs(xup - xlow)
  }
  percent_llf = percent_llf/n;percent_llf
  print(paste('percent_llf'));print(percent_llf)
  mse<-sum((forest$predictions-truth_Y)^2)/n;print(mse)
  avg_llf = avg_llf/n;print(avg_llf)
  data_cov<-rbind(data_cov,c(percent_llf,mse,avg_llf))
  list_save[[(time_)]]<-df
  time_=time_+1
  print(time_)
  
}
colnames(data_save_)<-c('phi0','phi1',
                        'mA|A_0','T_mA|A_0','mA|A_1','T_mA|A_1',
                        'mZ|A_0','T_mZ|A_0','mZ|A_1','T_mZ|A_1',
                        'mA_1','T_mA_1',
                        'mA|A_1-mA|A_0','T_mA|A_1-mA|A_0',
                        'mZ|A_1-mZ|A_0','T_mZ|A_1-mZ|A_0')
data_save_<-as.data.frame(data_save_)
lapply(data_save_, mean)
colnames(data_cov)<-c('percent_llf','mse','avg_llf')
data_cov<-as.data.frame(data_cov)
lapply(data_cov, mean)
save(data_save_,data_cov,list_save,file='c_2000_n_1.RData')
# data_cov<-as.matrix(data_cov)

one_t<-c();cy_save<-c();sd<-c();avgllf<-c()
n=nrow(list_save[[1]])
for(t in 1:n){
  percent_llf=0
  avg_llf_=0
  for(i in 1:length(list_save)){
    # one_t<-rbind(one_t,list_save[[i]][t,])
    if(length(list_save[[i]])!=0){
      xlow = ifelse(is.na(list_save[[i]][t,'lower']),0,list_save[[i]][t,'lower'])
      xup = ifelse(is.na(list_save[[i]][t,'upper']),0,list_save[[i]][t,'upper'])
      truthi = list_save[[i]][t,'truth']
      if(xlow <= truthi && truthi <= xup){
        percent_llf = percent_llf + 1;
      }
      avg_llf_=avg_llf_+xup-xlow
    }
    
    
  }
  cy_save<-c(cy_save,percent_llf*100/(length(list_save)))
  avgllf<-c(avgllf,avg_llf_/(length(list_save)))
  sd<-c(sd,avg_llf_*100/(length(list_save)*1.96*2))
  
  if(t%%500==0) print(t)
}
mean(cy_save);sd(cy_save);mean(sd);sd(sd)

truth_save<-c()
for(i in 1:length(list_save)){
  percent_llf_1=0
  if(length(list_save[[i]])!=0){
    truth_save<-c(truth_save,mean(list_save[[i]]$truth)) 
  }
}
mean(truth_save)

percent_llf_1=0;cy_save_1<-c()
for(i in 1:length(list_save)){
  percent_llf_1=0
  if(length(list_save[[i]])!=0){
    for(t in 1:nrow(data)){
      xlow = ifelse(is.na(list_save[[i]]$lower[t]),0,list_save[[i]]$lower[t])
      xup = ifelse(is.na(list_save[[i]]$upper[t]),0,list_save[[i]]$upper[t])
      truthi = list_save[[i]]$truth[t]
      if(xlow <= truthi && truthi <= xup){
        percent_llf_1 = percent_llf_1 + 1;
      }
      cy_save_1<-c(cy_save_1,percent_llf_1/nrow(data))
    }
  }
  
  
}