library(readstata13)
library(dplyr)
library(tidyverse)
library(sandwich)
library(fastDummies)


dat <- read.dta13("nsw_dw.dta")
dat <- dat[,-1]

dat$is_re74_0 <- ifelse(dat$re74==0,1,0)
dat$is_re75_0 <- ifelse(dat$re75==0,1,0)


S1 <- ifelse(dat$age>=26 & dat$education>=11, 1, 0)
S2 <- ifelse(dat$age>=26 & dat$education<11, 1, 0)
S3 <- ifelse(dat$age<26 & dat$education>=11, 1, 0)
S4 <- ifelse(dat$age<26 & dat$education<11, 1, 0)


D <- dat$treat

Y <- ifelse((dat$re78)!=0, log(dat$re78),0)


mean(Y[S1 & (D==1)]) #E[Y(1)|S1]
mean(Y[S1 & (D==0)]) #E[Y(0)|S1]
tau.s1 <- mean(Y[S1 & (D==1)]) - mean(Y[S1 & (D==0)]) 
tau.s1
mean(Y[S2 & (D==1)]) #E[Y(1)|S2]
mean(Y[S2 & (D==0)]) #E[Y(0)|S2]
tau.s2 <- mean(Y[S2 & (D==1)]) - mean(Y[S2 & (D==0)]) 
tau.s2
mean(Y[S3 & (D==1)]) #E[Y(1)|S3]
mean(Y[S3 & (D==0)]) #E[Y(0)|S3]
tau.s3 <- mean(Y[S3 & (D==1)]) - mean(Y[S3 & (D==0)]) 
tau.s3
mean(Y[S4 & (D==1)]) #E[Y(1)|S4]
mean(Y[S4 & (D==0)]) #E[Y(0)|S4]
tau.s4 <- mean(Y[S4 & (D==1)]) - mean(Y[S4 & (D==0)]) 
tau.s4
tau <- c(tau.s1, tau.s2, tau.s3, tau.s4)
true_p <- c(sum(S1), sum(S2), sum(S3), sum(S4)) / length(Y)
mu1_vec <- c(mean(Y[S1 & (D==1)]), mean(Y[S2 & (D==1)]), mean(Y[S3 & (D==1)]), mean(Y[S4 & (D==1)]))
mu0_vec <- c(mean(Y[S1 & (D==0)]), mean(Y[S2 & (D==0)]), mean(Y[S3 & (D==0)]), mean(Y[S4 & (D==0)]))
sd1_vec <- c(sd(Y[S1 & (D==1)]), sd(Y[S2 & (D==1)]), sd(Y[S3 & (D==1)]), sd(Y[S4 & (D==1)]))
sd0_vec <- c(sd(Y[S1 & (D==0)]), sd(Y[S2 & (D==0)]), sd(Y[S3 & (D==0)]), sd(Y[S4 & (D==0)]))

# p value
mu_vec <- tau
sd_vec <- sqrt(sd1_vec^2 + sd0_vec^2)
n <- c(sum(S1), sum(S2), sum(S3), sum(S4))

pvalue <- function(mu1, mu2, sd1, sd2, n1, n2){
  t <- (mu1 - mu2) / sqrt(sd1^2 / n1 + sd2^2 / n2)
  df <- n1 + n2 - 2
  if (t > 0){
    pvalue <- pt(q = t, df = df, lower.tail = F)
  }else{
    pvalue <- pt(q = t, df = df, lower.tail = T)
  }
  return(pvalue)
}

p <- matrix(NA, nrow = 4, ncol = 4)
for (i in 1:4){
  for (j in 1:4){
    p[i,j] <- pvalue(mu_vec[i], mu_vec[j], sd_vec[i], sd_vec[j], n[i], n[j])
  }
}
