# K <-kernels(Y_y)
# Ksum <- sum(K)
# kxx<-K/max(K)
kxx<-ifelse(Y_y==0,1,0)
results <-get_pre_y(as.matrix(x),kxx,as.matrix(kx))
colnames(kernel_y)<-c(colnames(x),'y_')
# final <- do.call('c',results)#整合结果
results_<-c()
results_<-rbind(results_,results)
pr_Y_given_X<-c(pr_Y_given_X,unlist(results_[,ncol(results_)]))
}
pr_Y_given_X_save<-pr_Y_given_X
pp<-c()
for(i in 1:nrow(data)){
# print(sum(pr_Y_given_X_save[seq(i,length(pr_Y_given_X_save),nrow(data))]))
pp<-c(pp,sum(pr_Y_given_X_save[seq(i,length(pr_Y_given_X_save),nrow(data))]))
# pr_Y_given_X_save[seq(i,length(pr_Y_given_X_save),nrow(data))]<-pr_Y_given_X_save[seq(i,length(pr_Y_given_X_save),nrow(data))]/sum(pr_Y_given_X_save[seq(i,length(pr_Y_given_X_save),nrow(data))])
}
pr_Y_given_X_save<-pr_Y_given_X_save/mean(pp)
pr_Y_given_X__<-c()
for(x_estimate in 1:length(Y)){
pr_Y_given_X_<-pr_Y_given_X_save[((x_estimate-1)*n+1):((x_estimate)*n)]
pr_Y_given_X_<-pr_Y_given_X_[x_estimate]
# print(pr_Y_given_X_);print(max(pr_Y_given_X_))
pr_Y_given_X__<-c(pr_Y_given_X__,pr_Y_given_X_)
}
pr_Y_given_X<-pr_Y_given_X__
##### #####
pr_A_S_1_given_X=p0_A_star_1_given_X
pr_A_S_0_given_X=p0_A_star_0_given_X
pr_Y_given_A_star_1_X=p0_Y_given_A_star_1_X
pr_Y_given_A_star_0_X=p0_Y_given_A_star_0_X
pr_A_given_A_star_1_X<-ifelse(A==1,p0_A_1_given_A_star_1_X,p0_A_0_given_A_star_1_X)
pr_Z_given_A_star_1_X<-ifelse(Z==1,p0_Z_1_given_A_star_1_X,p0_Z_0_given_A_star_1_X)
pr_A_given_A_star_0_X<-ifelse(A==1,p0_A_1_given_A_star_0_X,p0_A_0_given_A_star_0_X)
pr_Z_given_A_star_0_X<-ifelse(Z==1,p0_Z_1_given_A_star_0_X,p0_Z_0_given_A_star_0_X)
pi_save<-0
t=1;o=1
while(t > 0.001){
o=o+1
# print(o)
pA1<-pr_A_S_1_given_X*pr_Y_given_A_star_1_X*pr_A_given_A_star_1_X*pr_Z_given_A_star_1_X
pA0<-pr_A_S_0_given_X*pr_Y_given_A_star_0_X*pr_A_given_A_star_0_X*pr_Z_given_A_star_0_X
pi<-pA1/(pA1+pA0)
# print('mean(pi[A==1]);mean(pi[A==0])'); print(mean(pi[A==1])); print(mean(pi[A==0]))
# print('mean(A_[A==1]);mean(A_[A==0])'); print(mean(A_[A==1])); print(mean(A_[A==0]))
# mean(pi[A==1]);  mean(pi[A==0])#0.610757 0.3956043
# mean(pi[Z==1]);  mean(pi[Z==0])#0.4113866 0.680706
kx<-x <-data[,c('X1','X2')]
kernel_y<-get_pre_y(as.matrix(x),pi,as.matrix(x))
colnames(kernel_y)<-c(colnames(x),'y_')
pr_A_S_1_given_X<-unlist(kernel_y[,ncol(kernel_y)])
# View(cbind(pr_A_S_1_given_X,A_))
pr_A_S_0_given_X=1-pr_A_S_1_given_X
kx<-x<-data[,c('Y','X1','X2')]
kernel_y<-get_pre_y(as.matrix(x),pi,as.matrix(x))
colnames(kernel_y)<-c(colnames(x),'y_')
pr_A_S_1_given_Y_X<-unlist(kernel_y[,ncol(kernel_y)])
# View(cbind(pr_A_S_1_given_Y_X,A_))
pr_A_S_0_given_Y_X=1-pr_A_S_1_given_Y_X
# x<-data[data$A==1,c('X1','X2')]
# pi_<-pi[data$A==1]
# kx<-data[,c('X1','X2')]
# kernel_y<-get_pre_y(as.matrix(x),pi_,as.matrix(kx))
# colnames(kernel_y)<-c(colnames(x),'y_')
# pr_A_S_1_given_A_1_X<-unlist(kernel_y[,ncol(kernel_y)])
# pr_A_S_0_given_A_1_X=1-pr_A_S_1_given_A_1_X
x<-data[data$A==0,c('X1','X2')]
pi_<-pi[data$A==0]
kx<-data[,c('X1','X2')]
kernel_y<-get_pre_y(as.matrix(x),pi_,as.matrix(kx))
colnames(kernel_y)<-c(colnames(x),'y_')
pr_A_S_1_given_A_0_X<-unlist(kernel_y[,ncol(kernel_y)])
pr_A_S_0_given_A_0_X=1-pr_A_S_1_given_A_0_X
#mean(pr_A_S_1_given_A_1_X);
# print('mean(pr_A_S_1_given_A_0_X)');
# # print(mean(pr_A_S_1_given_A_1_X));
# print(mean(pr_A_S_1_given_A_0_X));
#mean(pr_A_S_1_given_A_1_X);mean(pr_A_S_1_given_A_0_X)# 0.6449716 0.3829605
x<-data[data$Z==1,c('X1','X2')]
pi_<-pi[data$Z==1]
kx<-data[,c('X1','X2')]
kernel_y<-get_pre_y(as.matrix(x),pi_,as.matrix(kx))
colnames(kernel_y)<-c(colnames(x),'y_')
pr_A_S_1_given_Z_1_X<-unlist(kernel_y[,ncol(kernel_y)])
pr_A_S_0_given_Z_1_X=1-pr_A_S_1_given_Z_1_X
#
# x<-data[data$Z==0,c('X1','X2')]
# pi_<-pi[data$Z==0]
# kx<-data[,c('X1','X2')]
# kernel_y<-get_pre_y(as.matrix(x),pi_,as.matrix(kx))
# colnames(kernel_y)<-c(colnames(x),'y_')
# pr_A_S_1_given_Z_0_X<-unlist(kernel_y[,ncol(kernel_y)])
# pr_A_S_0_given_Z_0_X=1-pr_A_S_1_given_Z_0_X
# mean(pr_A_S_1_given_Z_1_X);mean(pr_A_S_1_given_Z_0_X)#0.3971092 0.7164054
# pr_A_S_0_given_A_X=ifelse(A==1,pr_A_S_0_given_A_1_X,pr_A_S_0_given_A_0_X)
# # pr_A_S_1_given_A_X=1-pr_A_S_0_given_A_X
# pr_A_S_1_given_A_X=ifelse(A==1,pr_A_S_1_given_A_1_X,pr_A_S_1_given_A_0_X)
# pr_A_S_0_given_Z_X=ifelse(Z==1,pr_A_S_0_given_Z_1_X,pr_A_S_0_given_Z_0_X)
# # pr_A_S_1_given_Z_X=1-pr_A_S_0_given_Z_X
# pr_A_S_1_given_Z_X=ifelse(Z==1,pr_A_S_1_given_Z_1_X,pr_A_S_1_given_Z_0_X)
pr_Y_given_A_star_0_X<-pr_Y_given_X*pr_A_S_0_given_Y_X/pr_A_S_0_given_X
pr_Y_given_A_star_1_X<-pr_Y_given_X*pr_A_S_1_given_Y_X/pr_A_S_1_given_X
# pr_A_given_A_star_0_X<-pr_A_given_X*pr_A_S_0_given_A_X/pr_A_S_0_given_X
pr_A_0_given_A_star_0_X<-pr_A_0_given_X*pr_A_S_0_given_A_0_X/pr_A_S_0_given_X
# pr_A_1_given_A_star_0_X<-pr_A_1_given_X*pr_A_S_0_given_A_1_X/pr_A_S_0_given_X
# pr_A_0_given_A_star_0_X<-1-pr_A_1_given_A_star_0_X
pr_A_1_given_A_star_0_X=1-pr_A_0_given_A_star_0_X
pr_A_given_A_star_0_X<-ifelse(A==1,pr_A_1_given_A_star_0_X,pr_A_0_given_A_star_0_X)
# print('mean(pr_A_1_given_A_star_0_X);mean(A[A_==0])'); print(mean(pr_A_1_given_A_star_0_X));print(mean(A[A_==0]));# 0.2672546
# pr_A_given_A_star_1_X<-pr_A_given_X*pr_A_S_1_given_A_X/pr_A_S_1_given_X
# pr_A_1_given_A_star_1_X<-pr_A_1_given_X*pr_A_S_1_given_A_1_X/pr_A_S_1_given_X
pr_A_0_given_A_star_1_X<-pr_A_0_given_X*pr_A_S_1_given_A_0_X/pr_A_S_1_given_X
pr_A_1_given_A_star_1_X=1-pr_A_0_given_A_star_1_X
pr_A_given_A_star_1_X<-ifelse(A==1,pr_A_1_given_A_star_1_X,pr_A_0_given_A_star_1_X)
# print('mean(pr_A_1_given_A_star_1_X);mean(A[A_==1])');print(mean(pr_A_1_given_A_star_1_X));print(mean(A[A_==1])) #0.4778403
# pr_Z_given_A_star_0_X<-pr_Z_given_X*pr_A_S_0_given_Z_X/pr_A_S_0_given_X
pr_Z_1_given_A_star_0_X<-pr_Z_1_given_X*pr_A_S_0_given_Z_1_X/pr_A_S_0_given_X
# pr_Z_0_given_A_star_0_X<-(pr_Z_0_given_X*pr_A_S_0_given_Z_0_X)/pr_A_S_0_given_X
pr_Z_0_given_A_star_0_X=1-pr_Z_1_given_A_star_0_X
pr_Z_given_A_star_0_X<-ifelse(Z==1,pr_Z_1_given_A_star_0_X,pr_Z_0_given_A_star_0_X)
# print('mean(pr_Z_1_given_A_star_0_X);mean(Z[A_==0])'); print(mean(pr_Z_1_given_A_star_0_X));print(mean(Z[A_==0]))#0.8643745
# pr_Z_given_A_star_1_X<-pr_Z_given_X*pr_A_S_1_given_Z_X/pr_A_S_1_given_X
pr_Z_1_given_A_star_1_X<-(pr_Z_1_given_X*pr_A_S_1_given_Z_1_X)/pr_A_S_1_given_X
# pr_Z_0_given_A_star_1_X<-(pr_Z_0_given_X*pr_A_S_1_given_Z_0_X)/pr_A_S_1_given_X
# pr_Z_1_given_A_star_1_X<-1-pr_Z_0_given_A_star_1_X
pr_Z_0_given_A_star_1_X<-1-pr_Z_1_given_A_star_1_X
pr_Z_given_A_star_1_X<-ifelse(Z==1,pr_Z_1_given_A_star_1_X,pr_Z_0_given_A_star_1_X)
#
# print('mean(pr_Z_1_given_A_star_1_X);mean(Z[A_==1])'); print(mean(pr_Z_1_given_A_star_1_X));print(mean(Z[A_==1]))#0.6607444
# pr_Z_0_given_A_star_1_X<-pr_Z_0_given_X*pr_A_S_1_given_Z_0_X/pr_A_S_1_given_X
# View(cbind(ifelse(Z==1,pr_Z_1_given_A_star_1_X,pr_Z_0_given_A_star_1_X),pr_Z_given_A_star_1_X))
t=max(abs(pi-pi_save))
pi_save<-pi
# print(t)
}
kx<-c()
kx<-generate_kx(data[,c('Y','X1','X2')])
# for(i in 1:nrow(data[,c('Y','X1','X2')])){
#   kx<-rbind(kx,cbind(data[rep(i,each=nrow(data)),c('X1','X2')],rep(data$Y)))
# }
x<-data[,c('Y','X1','X2')]
t<-10
kernel_y_<-c()
for(i in 1:t){
kxx<-kx[((i-1)*(nrow(kx)/t)+1):((i)*(nrow(kx)/t)),]
kxx<-as.data.frame(kxx)
kernel_y<-get_pre_y(as.matrix(x),pi,as.matrix(kxx))
colnames(kernel_y)<-c(colnames(x),'y_')
kernel_y_<-rbind(kernel_y_,kernel_y)
print(i)
}
p1<-unlist(kernel_y_[,ncol(kernel_y_)])
p0=1-p1
##### a new try #####
pr_Y_given_X<-pr_Y_given_X_save
# }
ii<-cbind(pr_Y_given_X,p1,p0)
# pr_Y_given_A_star_1_X_all<-c()
# pr_Y_given_A_star_0_X_all<-c()
#
# for(j in 1:(nrow(ii)/nrow(data))){
#   o_1<-ii[((j-1)*nrow(data)+1):(j*nrow(data)),1]
#   o_2<-ii[((j-1)*nrow(data)+1):(j*nrow(data)),2]
#   o_4<-ii[((j-1)*nrow(data)+1):(j*nrow(data)),3]
#
#   o_3<-pr_A_S_1_given_X
#   o_5<-pr_A_S_0_given_X
#   pr_Y_given_A_star_1_X_all<-c(pr_Y_given_A_star_1_X_all,vector_operation(o_1,o_2,o_3))
#   pr_Y_given_A_star_0_X_all<-c(pr_Y_given_A_star_0_X_all,vector_operation(o_1,o_4,o_5))
#   if(j %% 200==0) print(j)
# }
nrow_data<-nrow(data)
li<-fast_vector_operations(as.matrix(ii),pr_A_S_1_given_X,pr_A_S_0_given_X,nrow_data)
pr_Y_given_A_star_1_X_all<-li$pr_Y_given_A_star_1_X_all
pr_Y_given_A_star_0_X_all<-li$pr_Y_given_A_star_0_X_all
E_Y_given_A_star_1_X<-c();E_Y_given_A_star_0_X<-c()
for(j in 1:(length(pr_Y_given_A_star_1_X_all)/nrow(data))){
o_1<-pr_Y_given_A_star_1_X_all[((j-1)*nrow(data)+1):(j*nrow(data))]
o_2<-pr_Y_given_A_star_0_X_all[((j-1)*nrow(data)+1):(j*nrow(data))]
o_1<-o_1/sum(o_1)
o_2<-o_2/sum(o_2)
E_Y_given_A_star_1_X<-c(E_Y_given_A_star_1_X,sum(o_1*Y,na.rm = T))
E_Y_given_A_star_0_X<-c(E_Y_given_A_star_0_X,sum(o_2*Y,na.rm = T))
}
# phi1<-(A-pr_A_1_given_A_star_0_X)*(Z-pr_Z_1_given_A_star_0_X)*(Y-E_Y_given_A_star_1_X)/((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))+E_Y_given_A_star_1_X
# # phi1<-(A-pr_A_given_A_star_0_X)*(Z-pr_Z_given_A_star_0_X)*(Y-E_Y_given_A_star_1_X)/((pr_A_given_A_star_1_X-pr_A_given_A_star_0_X)*(pr_Z_given_A_star_1_X-pr_Z_given_A_star_0_X)*(pr_A_S_1_given_X))-E_Y_given_A_star_1_X
#
# phi1<-mean(phi1);phi1
phi1_1<-(A-pr_A_1_given_A_star_0_X)*(Z-pr_Z_1_given_A_star_0_X)*(Y-E_Y_given_A_star_1_X)+E_Y_given_A_star_1_X*((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))
phi1_2<-((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))
# phi1<-(A-pr_A_given_A_star_0_X)*(Z-pr_Z_given_A_star_0_X)*(Y-E_Y_given_A_star_1_X)/((pr_A_given_A_star_1_X-pr_A_given_A_star_0_X)*(pr_Z_given_A_star_1_X-pr_Z_given_A_star_0_X)*(pr_A_S_1_given_X))-E_Y_given_A_star_1_X
phi1<-mean(phi1_1)/mean(phi1_2);phi1
# phi0<-(A-pr_A_1_given_A_star_1_X)*(Z-pr_Z_1_given_A_star_1_X)*(Y-E_Y_given_A_star_0_X)/((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))+E_Y_given_A_star_0_X
# phi0<-mean(phi0);phi0
phi0_1<-(A-pr_A_1_given_A_star_1_X)*(Z-pr_Z_1_given_A_star_1_X)*(Y-E_Y_given_A_star_0_X)+E_Y_given_A_star_0_X*((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))
phi0_2<-((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))
phi0<-mean(phi0_1)/mean(phi0_2);phi0
# doc<-addtitle(paste('phi1',phi1,sep=' '),1,doc)
# doc<-addtitle(paste('phi0',phi0,sep=' '),1,doc)
phi1.1<-phi1;phi0.1<-phi0
phi1<-(A-pr_A_1_given_A_star_0_X)*(Z-pr_Z_1_given_A_star_0_X)*(Y-E_Y_given_A_star_1_X)/((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))+E_Y_given_A_star_1_X
phi1<-mean(phi1);phi1
phi0<-(A-pr_A_1_given_A_star_1_X)*(Z-pr_Z_1_given_A_star_1_X)*(Y-E_Y_given_A_star_0_X)/((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))+E_Y_given_A_star_0_X
phi0<-mean(phi0);phi0
data_save<-data
# save(data,A,pr_A_1_given_A_star_0_X,Z,pr_Z_1_given_A_star_0_X,
#      Y,E_Y_given_A_star_1_X,pr_A_1_given_A_star_1_X,pr_A_1_given_A_star_0_X,
#      pr_Z_1_given_A_star_1_X,pr_Z_1_given_A_star_0_X,pr_A_S_1_given_X,E_Y_given_A_star_1_X,
#      E_Y_given_A_star_0_X,pr_A_S_0_given_X,E_Y_given_A_star_0_X,
#      file='/home/qixuezhu1/R/data_save/50000_r_n_n_n_200.RData')
# doc<-addtitle(paste('phi1',phi1,sep=' '),1,doc)
# doc<-addtitle(paste('phi0',phi0,sep=' '),1,doc)
#
# doc<-addtitle(paste('mean(A-pr_A_1_given_A_star_0_X)',mean(A-pr_A_1_given_A_star_0_X),sep=' '),4,doc)
# doc<-addtitle(paste('mean(Z-pr_Z_1_given_A_star_0_X)',mean(Z-pr_Z_1_given_A_star_0_X),sep=' '),4,doc)
# doc<-addtitle(paste('mean(Y-E_Y_given_A_star_1_X)',mean(Y-E_Y_given_A_star_1_X),sep=' '),4,doc)
# doc<-addtitle(paste('mean(pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)',mean(pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X),sep=' '),4,doc)
# doc<-addtitle(paste('mean(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)', mean(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X),sep=' '),4,doc)
# doc<-addtitle(paste('mean(pr_A_S_1_given_X)',mean(pr_A_S_1_given_X),sep=' '),4,doc)
# doc<-addtitle(paste('mean(A_)', mean(A_),sep=' '),4,doc)
# doc<-addtitle(paste('mean(E_Y_given_A_star_1_X)',mean(E_Y_given_A_star_1_X),sep=' '),4,doc)
# doc<-addtitle(paste('mean(pr_A_1_given_A_star_0_X)',mean(pr_A_1_given_A_star_0_X),sep=' '),4,doc)
# doc<-addtitle(paste('mean(A[A_==0])',mean(A[A_==0]),sep=' '),4,doc)
# doc<-addtitle(paste('mean(pr_A_1_given_A_star_1_X)',mean(pr_A_1_given_A_star_1_X),sep=' '),4,doc)
# doc<-addtitle(paste('mean(A[A_==1])',mean(A[A_==1]),sep=' '),4,doc)
# doc<-addtitle(paste('mean(pr_Z_1_given_A_star_0_X)',mean(pr_Z_1_given_A_star_0_X),sep=' '),4,doc)
# doc<-addtitle(paste('mean(Z[A_==0])',mean(Z[A_==0]),sep=' '),4,doc)
# doc<-addtitle(paste('mean(pr_Z_1_given_A_star_1_X)',mean(pr_Z_1_given_A_star_1_X),sep=' '),4,doc)
# doc<-addtitle(paste('mean(Z[A_==1])',mean(Z[A_==1]),sep=' '),4,doc)
data_save_<-rbind(data_save_,c(round(phi0.1,3),round(phi1.1,3),
round(phi0,3),round(phi1,3),
round(mean(pr_A_1_given_A_star_0_X),3),round(mean(A[A_==0]),3),
round(mean(pr_A_1_given_A_star_1_X),3),round(mean(A[A_==1]),3),
round(mean(pr_Z_1_given_A_star_0_X),3),round(mean(Z[A_==0]),3),
round(mean(pr_Z_1_given_A_star_1_X),3),round(mean(Z[A_==1]),3),
round(mean(pr_A_S_1_given_X),3),round(mean(A_),3),
round(mean(pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X),3),
round(mean(A[A_==1])-mean(A[A_==0]),3),
round(mean(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X),3),
round(mean(Z[A_==1])-mean(Z[A_==0]),3)))
phi1<-(A-pr_A_1_given_A_star_0_X)*(Z-pr_Z_1_given_A_star_0_X)*(Y-E_Y_given_A_star_1_X)/((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))+E_Y_given_A_star_1_X
phi0<-(A-pr_A_1_given_A_star_1_X)*(Z-pr_Z_1_given_A_star_1_X)*(Y-E_Y_given_A_star_0_X)/((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))+E_Y_given_A_star_0_X
numerator_1<-(A-pr_A_1_given_A_star_0_X)*(Z-pr_Z_1_given_A_star_0_X)*(Y-E_Y_given_A_star_1_X)
denominator_1<-((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))
add_1<-E_Y_given_A_star_1_X
numerator_0<-(A-pr_A_1_given_A_star_1_X)*(Z-pr_Z_1_given_A_star_1_X)*(Y-E_Y_given_A_star_0_X)
denominator_0<-((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))
add_0<-E_Y_given_A_star_0_X
phi = phi1-phi0
truth_Y<-(X1+X2)
X=data[,c('X1','X2')]
Y<-phi
# psi<-list()
# psi[["numerator"]]<-phi1_1*phi0_2-phi0_1*phi1_2
# psi[["denominator"]]<-phi0_2*phi1_2
#caculate instrument
# W.centered <- rep(1,n)
data<-data_save
X<-data[,c('X1','X2')]
num.trees= 4000
clusters= numeric(0)
sample.weights = NULL;equalize.cluster.weights = FALSE
samples.per.cluster<- validate_equalize_cluster_weights(equalize.cluster.weights, clusters, sample.weights)
sample.fraction= 0.5
mtry= 8
min.node.size= 6
honesty= TRUE
honesty.fraction= 0.5
honesty.prune.leaves= TRUE
alpha= 0.05
imbalance.penalty= 0
stabilize.splits= TRUE
ci.group.size= 4
compute.oob.predictions = TRUE
num.threads= 0
seed=runif(1, 0, .Machine$integer.max)
tune.parameters = "none";
# A<-
data <- create_train_matrices(X, outcome = A, instrument = rep(0,n),sample.weights = sample.weights,numerator_1=numerator_1,denominator_1=denominator_1,add_1=add_1,
numerator_0=numerator_0,denominator_0=denominator_0,add_0=add_0)
args <- list(num.trees = num.trees,
clusters = clusters,
samples.per.cluster = samples.per.cluster,
sample.fraction = sample.fraction,
mtry = mtry,
min.node.size = min.node.size,
honesty = honesty,
honesty.fraction = honesty.fraction,
honesty.prune.leaves = honesty.prune.leaves,
alpha = alpha,
imbalance.penalty = imbalance.penalty,
ci.group.size = ci.group.size,
compute.oob.predictions = compute.oob.predictions,
num.threads = num.threads,
seed = seed,
legacy.seed = get_legacy_seed())
forest <- do.call.rcpp(regression_train, c(data, args))
# chech<-forest$`_leaf_samples`[[1]]
# for(oo in 1:length(chech)){
#   print(A[chech[[oo]]])
# }
# summary(forest$predictions[A==0])
# plot(forest$predictions,A)
W.hat <- forest$predictions
W.centered <- A - W.hat
W.centered <-W.centered*1.2
# W.centered<-rep(1,n)
num.trees = 3000;
sample.weights = NULL;
clusters = NULL;
equalize.cluster.weights = FALSE;
sample.fraction = 0.5;
# mtry = min(ceiling(sqrt(ncol(X)) + 20), ncol(X));
mtry = 10;
min.node.size = 5;
honesty = TRUE;
honesty.fraction = 0.5;
honesty.prune.leaves = TRUE;
alpha = 0.05;
imbalance.penalty = 0;
ci.group.size = 5;
tune.parameters = "none";
tune.num.trees = 50;
tune.num.reps = 100;
tune.num.draws = 1000;
compute.oob.predictions = TRUE;
num.threads = NULL;
seed = runif(1, 0, .Machine$integer.max)
validate_sample_weights(sample.weights, X)
Y <- validate_observations(Y, X)
clusters <- validate_clusters(clusters, X)
samples.per.cluster <- validate_equalize_cluster_weights(equalize.cluster.weights, clusters, sample.weights)
num.threads <- validate_num_threads(num.threads)
all.tunable.params <- c("sample.fraction", "mtry", "min.node.size", "honesty.fraction",
"honesty.prune.leaves", "alpha", "imbalance.penalty")
default.parameters <- list(sample.fraction = 0.5,
mtry =10, # min(ceiling(sqrt(ncol(X)) + 20), ncol(X)),
min.node.size = 7,
honesty.fraction = 0.5,
honesty.prune.leaves = TRUE,
alpha = 0.05,
imbalance.penalty = 0)
# data<-list()
data <- create_train_matrices(X, outcome = truth_Y, instrument = W.centered,sample.weights = sample.weights,numerator_1=numerator_1,denominator_1=denominator_1,add_1=add_1,
numerator_0=numerator_0,denominator_0=denominator_0,add_0=add_0)
#truth_Y
# # data$train.matrix<-as.matrix(cbind(X,Y,numerator_1,denominator_1,add_1,numerator_0,denominator_0,add_0,rep(1,n)))
# # data[['outcome_index']]<-2
# # data[['sample_weight_index']]<-9
# # data[['use_sample_weights']]<-FALSE
# # data[['numerator_1_index']]<-3
# # data[['denominator_1_index']]<-4
# # data[['add_1_index']]<-5
# # data[['numerator_0_index']]<-6
# # data[['denominator_0_index']]<-7
# # data[['add_0_index']]<-8
#
mtry=7
args <- list(num.trees = 2000,
clusters = clusters,
samples.per.cluster = samples.per.cluster,
sample.fraction = sample.fraction,
mtry = mtry,
min.node.size = min.node.size,
honesty = honesty,
honesty.fraction = honesty.fraction,
honesty.prune.leaves = honesty.prune.leaves,
alpha = alpha,
imbalance.penalty = imbalance.penalty,
ci.group.size = ci.group.size,
compute.oob.predictions = compute.oob.predictions,
num.threads = num.threads,
seed = seed,
legacy.seed = get_legacy_seed())
forest <- do.call.rcpp(regression_train, c(data, args))
preds = forest
df = data.frame(predictions = preds$predictions,
truth = truth_Y,
upper = preds$predictions + 1.96*sqrt(preds$variance.estimates),
lower = preds$predictions - 1.96*sqrt(preds$variance.estimates))
truth = truth_Y
# plot( preds$predictions,truth_Y)
#
######dn_start######
# phi1_1<-(A-pr_A_1_given_A_star_0_X)*(Z-pr_Z_1_given_A_star_0_X)*(Y-E_Y_given_A_star_1_X)+E_Y_given_A_star_1_X*((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))
# phi1_2<-((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))
#
# phi0_1<-(A-pr_A_1_given_A_star_1_X)*(Z-pr_Z_1_given_A_star_1_X)*(Y-E_Y_given_A_star_0_X)+E_Y_given_A_star_0_X*((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))
# phi0_2<-((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))
# numerator_1<-(A-pr_A_1_given_A_star_0_X)*(Z-pr_Z_1_given_A_star_0_X)*(Y-E_Y_given_A_star_1_X)
# denominator_1<-((pr_A_1_given_A_star_1_X-pr_A_1_given_A_star_0_X)*(pr_Z_1_given_A_star_1_X-pr_Z_1_given_A_star_0_X)*(pr_A_S_1_given_X))
# add_1<-E_Y_given_A_star_1_X
# numerator_0<-(A-pr_A_1_given_A_star_1_X)*(Z-pr_Z_1_given_A_star_1_X)*(Y-E_Y_given_A_star_0_X)
# denominator_0<-((pr_A_1_given_A_star_0_X-pr_A_1_given_A_star_1_X)*(pr_Z_1_given_A_star_0_X-pr_Z_1_given_A_star_1_X)*(pr_A_S_0_given_X))
# add_0<-E_Y_given_A_star_0_X
#
# psi<-list()
# psi[["numerator"]]<-phi1_1*phi0_2-phi0_1*phi1_2
# psi[["denominator"]]<-phi0_2*phi1_2
#
# # W.centered <- rep(1,n)
#
# X<-data[,c('X1','X2')]
# num.trees= 2000
# clusters= numeric(0)
# sample.weights = NULL;equalize.cluster.weights = FALSE
# samples.per.cluster<- validate_equalize_cluster_weights(equalize.cluster.weights, clusters, sample.weights)
# sample.fraction= 0.5
# mtry= 9
# min.node.size= 5
# honesty= TRUE
# honesty.fraction= 0.5
# honesty.prune.leaves= TRUE
# alpha= 0.05
# imbalance.penalty= 0
# stabilize.splits= TRUE
# ci.group.size= 4
# compute.oob.predictions = TRUE
# num.threads= 0
# seed=runif(1, 0, .Machine$integer.max)
# tune.parameters = "none";
# data <- create_train_matrices(X, outcome = A, sample.weights = sample.weights,numerator_1=numerator_1,denominator_1=denominator_1,add_1=add_1,
#                               numerator_0=numerator_0,denominator_0=denominator_0,add_0=add_0)
# args <- list(num.trees = num.trees,
#              clusters = clusters,
#              samples.per.cluster = samples.per.cluster,
#              sample.fraction = sample.fraction,
#              mtry = mtry,
#              min.node.size = min.node.size,
#              honesty = honesty,
#              honesty.fraction = honesty.fraction,
#              honesty.prune.leaves = honesty.prune.leaves,
#              alpha = alpha,
#              imbalance.penalty = imbalance.penalty,
#              ci.group.size = ci.group.size,
#              compute.oob.predictions = compute.oob.predictions,
#              num.threads = num.threads,
#              seed = seed,
#              legacy.seed = get_legacy_seed())
# forest <- do.call.rcpp(regression_train, c(data, args))
#
# # forest.W <- regression_forest(X,A, num.trees = max(50, num.trees / 4),
# #                               sample.weights = sample.weights, clusters = clusters,
# #                               equalize.cluster.weights = equalize.cluster.weights,
# #                               sample.fraction = sample.fraction, mtry = mtry,
# #                               min.node.size = 5, honesty = TRUE,
# #                               honesty.fraction = 0.5, honesty.prune.leaves = TRUE,
# #                               alpha = alpha, imbalance.penalty = imbalance.penalty,
# #                               ci.group.size = 1, tune.parameters = tune.parameters,
# #                               compute.oob.predictions = TRUE,
# #                               num.threads = num.threads, seed = seed)
# W.hat <- forest$predictions
# W.hat<- rep(0,n)
# W.centered <- A - W.hat
# D<- rep(1,n)
# sample.weights<- NULL
#
# data_ <- create_train_matrices(X,
#                               treatment = W.centered,
#                               survival.numerator = psi[["numerator"]],
#                               survival.denominator = psi[["denominator"]],
#                               censor = D,
#                               sample.weights = sample.weights)
#
# args <- list(num.trees = num.trees,
#              clusters = clusters,
#              samples.per.cluster = samples.per.cluster,
#              sample.fraction = sample.fraction,
#              mtry = mtry,
#              min.node.size = min.node.size,
#              honesty = honesty,
#              honesty.fraction = honesty.fraction,
#              honesty.prune.leaves = honesty.prune.leaves,
#              alpha = alpha,
#              imbalance.penalty = imbalance.penalty,
#              stabilize.splits = stabilize.splits,
#              ci.group.size = ci.group.size,
#              compute.oob.predictions = compute.oob.predictions,
#              num.threads = num.threads,
#              seed = seed,
#              legacy.seed = get_legacy_seed())
# forest <- do.call.rcpp(causal_survival_train, c(data_,args))
# truth_Y<-(X1+X2)
# df = data.frame(predictions = forest$predictions,
#                 truth = truth_Y,
#                 upper = forest$predictions + 1.96*sqrt(forest$variance.estimates),
#                 lower = forest$predictions - 1.96*sqrt(forest$variance.estimates))
# truth = truth_Y
#dn_end
# plot(truth_Y,forest$predictions)
######coverage part#####
percent_llf=0;avg_llf=0;
n<-nrow(df)
for(i in 1:n){
xlow = ifelse(is.na(df$lower[i]),0,df$lower[i])
xup = ifelse(is.na(df$upper[i]),0,df$upper[i])
truthi = truth[i]
if(xlow <= truthi && truthi <= xup){
percent_llf = percent_llf + 1;
}
avg_llf = avg_llf + abs(xup - xlow)
}
percent_llf = percent_llf/n;percent_llf
mse<-sum((forest$predictions-truth_Y)^2)/n;mse
avg_llf = avg_llf/n;avg_llf
data_cov<-rbind(data_cov,c(percent_llf,mse,avg_llf))
print(c(percent_llf,mse,avg_llf))
list_save[[(time_)]]<-df
time_=time_+1
print(time_)
}
