res = res_counting
args <- args_counting
## we realize it is in the mlt.m, specifically mlt_exact and mlt_interval
args_ltrc_rc <- readRDS("/Users/wyao/Downloads/args_ltrc_rc.rds")
args_counting <- readRDS("/Users/wyao/Downloads/args_counting.rds")
# eY <- .mm_exact(model, data = data, response = response, object = y)
# iY <- .mm_interval(model, data = data, response = response, object = y)
args <- args_ltrc_rc
args <- args_counting
## save immediate results res_ltrc_rc and res_counting, that has "response" and "y", to be used in .mm_exact, .mm_interval
res_ltrc_rc <- readRDS('/Users/wyao/Downloads/formm_ltrc_rc.rds')
res_counting <- readRDS('/Users/wyao/Downloads/formm_counting.rds')
res = res_counting
args <- args_counting
model = args$model
results_all <- readRDS("/Users/wyao/Dropbox/RESEARCH/post_selection/high_dimension/results_13_new.rds")
print(c(round(nrow(results_all[results_all$ovlp1==5,])),
mean(results_all$Pr.1[results_all$ovlp1==5]<0.1)))
print(c(round(nrow(results_all[results_all$ovlp1==5,])),
mean(results_all$Pr.2[results_all$ovlp1==5]<0.1)))
print(c(round(nrow(results_all[results_all$ovlp1==5,])),
mean(results_all$Pr.1[results_all$ovlp1==5]<0.05)))
print(c(round(nrow(results_all[results_all$ovlp1==5,])),
mean(results_all$Pr.2[results_all$ovlp1==5]<0.05)))
results_all_less5 = results_all[results_all$ovlp1<5,]
print(c(round(nrow(results_all_less5[results_all_less5$ovlp_diff>0,])),
sum(results_all_less5$Pr.1[results_all_less5$ovlp_diff>0]<0.05)/nrow(results_all_less5[results_all_less5$ovlp_diff>0,])))
print(c(round(nrow(results_all_less5[results_all_less5$ovlp_diff>0,])),
sum(results_all_less5$Pr.2[results_all_less5$ovlp_diff>0]<0.05)/nrow(results_all_less5[results_all_less5$ovlp_diff>0,])))
print(c(round(nrow(results_all_less5[results_all_less5$ovlp_diff>0,])),
sum(results_all_less5$Pr.1[results_all_less5$ovlp_diff>0]<0.1)/nrow(results_all_less5[results_all_less5$ovlp_diff>0,])))
print(c(round(nrow(results_all_less5[results_all_less5$ovlp_diff>0,])),
sum(results_all_less5$Pr.2[results_all_less5$ovlp_diff>0]<0.1)/nrow(results_all_less5[results_all_less5$ovlp_diff>0,])))
print(c(round(nrow(results_all_less5[results_all_less5$ovlp_diff==0,])),
sum(results_all_less5$Pr.1[results_all_less5$ovlp_diff==0]<0.1)/nrow(results_all_less5[results_all_less5$ovlp_diff==0,])))
print(c(round(nrow(results_all_less5[results_all_less5$ovlp_diff==0,])),
sum(results_all_less5$Pr.2[results_all_less5$ovlp_diff==0]<0.1)/nrow(results_all_less5[results_all_less5$ovlp_diff==0,])))
print(c(round(nrow(results_all_less5)),
mean(results_all_less5$Pr.1<0.1)))
print(c(round(nrow(results_all_less5)),
mean(results_all_less5$Pr.1<0.1)))
print(c(round(nrow(results_all_less5)),
mean(results_all_less5$Pr.2<0.1)))
results_all <- readRDS("/Users/wyao/Dropbox/RESEARCH/post_selection/high_dimension/results_13_new.rds")
results_all_less5 = results_all[results_all$ovlp1<5,]
print(c(round(nrow(results_all_less5[results_all_less5$ovlp_diff>0,])),
sum(results_all_less5$Pr.1[results_all_less5$ovlp_diff>0]<0.1)/nrow(results_all_less5[results_all_less5$ovlp_diff>0,])))
print(c(round(nrow(results_all_less5[results_all_less5$ovlp_diff>0,])),
sum(results_all_less5$Pr.2[results_all_less5$ovlp_diff>0]<0.1)/nrow(results_all_less5[results_all_less5$ovlp_diff>0,])))
print(c(round(nrow(results_all_less5[results_all_less5$ovlp_diff==0,])),
sum(results_all_less5$Pr.1[results_all_less5$ovlp_diff==0]<0.1)/nrow(results_all_less5[results_all_less5$ovlp_diff==0,])))
print(c(round(nrow(results_all_less5[results_all_less5$ovlp_diff==0,])),
sum(results_all_less5$Pr.2[results_all_less5$ovlp_diff==0]<0.1)/nrow(results_all_less5[results_all_less5$ovlp_diff==0,])))
mean(results_all$Pr.1[results_all$ovlp1==5]<0.05)))
mean(results_all$Pr.1[results_all$ovlp1==5]<0.05))
mean(results_all$Pr.1[results_all$ovlp1==5]<0.05)
mean(results_all$Pr.1[results_all$ovlp1==5]<0.01)
mean(results_all$Pr.1[results_all$ovlp1==5]<0.001)
mean(results_all_less5$Pr.1[results_all_less5$ovlp_diff==0]<0.001)
mean(results_all_less5$Pr.1[results_all_less5$ovlp_diff>0]<0.001)
print(c(nrow(results_all_less5[results_all_less5$ovlp_diff>0,]),
mean(results_all_less5$Pr.1[results_all_less5$ovlp_diff>0]<0.1)))
print(c(nrow(results_all_less5[results_all_less5$ovlp_diff>0,]),
mean(results_all_less5$Pr.2[results_all_less5$ovlp_diff>0]<0.1)))
## ovlp_diff = 0
print(c(nrow(results_all_less5[results_all_less5$ovlp_diff==0,]),
mean(results_all_less5$Pr.1[results_all_less5$ovlp_diff==0]<0.1)))
print(c(nrow(results_all_less5[results_all_less5$ovlp_diff==0,]),
mean(results_all_less5$Pr.2[results_all_less5$ovlp_diff==0]<0.1)))
print(sprintf('#{ovlp_diff = 0} = %1.0f, %{unadjusted p-values < 0.1} = 1.2%f',
nrow(results_all_less5[results_all_less5$ovlp_diff==0,]),
mean(results_all_less5$Pr.1[results_all_less5$ovlp_diff==0]<0.1)))
## ovlp_diff = 0
print(sprintf('#{ovlp_diff = 0} = %1.0f, %{unadjusted p-values < 0.1} = %1.2f',
nrow(results_all_less5[results_all_less5$ovlp_diff==0,]),
mean(results_all_less5$Pr.1[results_all_less5$ovlp_diff==0]<0.1)))
## ovlp_diff = 0
print(sprintf('#{ovlp_diff = 0} = %1.0f, \%{unadjusted p-values < 0.1} = %1.2f',
nrow(results_all_less5[results_all_less5$ovlp_diff==0,]),
mean(results_all_less5$Pr.1[results_all_less5$ovlp_diff==0]<0.1)))
## ovlp_diff = 0
print(sprintf('#{ovlp_diff = 0} = %1.0f, {unadjusted p-values < 0.1} = %1.2f',
nrow(results_all_less5[results_all_less5$ovlp_diff==0,]),
mean(results_all_less5$Pr.1[results_all_less5$ovlp_diff==0]<0.1)))
## ovlp_diff = 0
print(sprintf('#{ovlp_diff = 0} = %1.0f, %%{unadjusted p-values < 0.1} = %1.2f',
nrow(results_all_less5[results_all_less5$ovlp_diff==0,]),
mean(results_all_less5$Pr.1[results_all_less5$ovlp_diff==0]<0.1)))
print(sprintf('#{ovlp_diff = 0} = %1.0f, %%{adjusted p-values < 0.1} = %1.2f',
nrow(results_all_less5[results_all_less5$ovlp_diff==0,]),
mean(results_all_less5$Pr.2[results_all_less5$ovlp_diff==0]<0.1)))
print(sprintf('#{ovlp1 == 5} = %1.0f, %%{unadjusted p-values < 0.1} = %1.2f',
nrow(results_all[results_all$ovlp1==5,])),
mean(results_all$Pr.1[results_all$ovlp1==5]<0.1))
nrow(results_all[results_all$ovlp1==5,]))
print(sprintf('#{ovlp1 == 5} = %1.0f, %%{unadjusted p-values < 0.1} = %1.2f',
nrow(results_all[results_all$ovlp1==5,]),
mean(results_all$Pr.1[results_all$ovlp1==5]<0.1)))
print(sprintf('%%{ovlp1 == 5} = %1.0f (#{ovlp1 == 5} = %1.0f), %%{unadjusted p-values < 0.1} = %1.2f',
nrow(results_all[results_all$ovlp1==5,])/nrow(results_all),
nrow(results_all[results_all$ovlp1==5,]),
mean(results_all$Pr.1[results_all$ovlp1==5]<0.1)))
print(sprintf('%%{ovlp1 == 5} = %1.0f (#{ovlp1 == 5} = %1.0f), %%{adjusted p-values < 0.1} = %1.2f',
nrow(results_all[results_all$ovlp1==5,])/nrow(results_all),
nrow(results_all[results_all$ovlp1==5,]),
mean(results_all$Pr.2[results_all$ovlp1==5]<0.1)))
print(sprintf('%%{ovlp1 == 5} = %1.0f (#{ovlp1 == 5} = %1.0f),\n %%{unadjusted p-values < 0.1} = %1.2f',
nrow(results_all[results_all$ovlp1==5,])/nrow(results_all),
nrow(results_all[results_all$ovlp1==5,]),
mean(results_all$Pr.1[results_all$ovlp1==5]<0.1)))
print(sprintf('%%{ovlp1 == 5} = %1.0f (#{ovlp1 == 5} = %1.0f),\\ %%{unadjusted p-values < 0.1} = %1.2f',
nrow(results_all[results_all$ovlp1==5,])/nrow(results_all),
nrow(results_all[results_all$ovlp1==5,]),
mean(results_all$Pr.1[results_all$ovlp1==5]<0.1)))
print(sprintf('%%{ovlp1 == 5} = %1.0f (#{ovlp1 == 5} = %1.0f), %%{unadjusted p-values < 0.1} = %1.2f',
nrow(results_all[results_all$ovlp1==5,])/nrow(results_all),
nrow(results_all[results_all$ovlp1==5,]),
mean(results_all$Pr.1[results_all$ovlp1==5]<0.1)))
print(sprintf('%%{ovlp1 == 5} = %1.0f (#{ovlp1 == 5} = %1.0f), %%{adjusted p-values < 0.1} = %1.2f',
nrow(results_all[results_all$ovlp1==5,])/nrow(results_all),
nrow(results_all[results_all$ovlp1==5,]),
mean(results_all$Pr.2[results_all$ovlp1==5]<0.1)))
nrow(results_all[results_all$ovlp1==5,])
nrow(results_all)
print(sprintf('%%{ovlp1 == 5} = %1.0f (#{ovlp1 == 5} = %1.0f), %%{unadjusted p-values < 0.1} = %1.2f',
nrow(results_all[results_all$ovlp1==5,])/nrow(results_all),
nrow(results_all[results_all$ovlp1==5,]),
mean(results_all$Pr.1[results_all$ovlp1==5]<0.1)))
nrow(results_all[results_all$ovlp1==5,])/nrow(results_all)
print(sprintf('%%{ovlp1 == 5} = %1.2f (#{ovlp1 == 5} = %1.0f), %%{unadjusted p-values < 0.1} = %1.2f',
nrow(results_all[results_all$ovlp1==5,])/nrow(results_all),
nrow(results_all[results_all$ovlp1==5,]),
mean(results_all$Pr.1[results_all$ovlp1==5]<0.1)))
print(sprintf('%%{ovlp1 == 5} = %1.2f (#{ovlp1 == 5} = %1.0f), %%{adjusted p-values < 0.1} = %1.2f',
nrow(results_all[results_all$ovlp1==5,])/nrow(results_all),
nrow(results_all[results_all$ovlp1==5,]),
mean(results_all$Pr.2[results_all$ovlp1==5]<0.1)))
print(sprintf('#{ovlp_diff = 0} = %1.0f (#{ovlp1 < 5} = %1.0f), %%{unadjusted p-values < 0.1} = %1.2f',
nrow(results_all_less5[results_all_less5$ovlp_diff==0,]),
nrow(results_all_less5),
mean(results_all_less5$Pr.1[results_all_less5$ovlp_diff==0]<0.1)))
print(sprintf('#{ovlp_diff = 0} = %1.0f (#{ovlp1 < 5} = %1.0f), %%{adjusted p-values < 0.1} = %1.2f',
nrow(results_all_less5[results_all_less5$ovlp_diff==0,]),
nrow(results_all_less5),
mean(results_all_less5$Pr.2[results_all_less5$ovlp_diff==0]<0.1)))
results1 <- readRDS("/Users/wyao/Dropbox/RESEARCH/post_selection/high_dimension/results_13_new.rds")
results2 <- readRDS("/Users/wyao/Dropbox/RESEARCH/post_selection/high_dimension/results_13_new2.rds")
results_all <- rbind(results,results2)
results1 <- readRDS("/Users/wyao/Dropbox/RESEARCH/post_selection/high_dimension/results_13_new.rds")
results2 <- readRDS("/Users/wyao/Dropbox/RESEARCH/post_selection/high_dimension/results_13_new2.rds")
results_all <- rbind(results1,results2)
nrow(results_all)
results[1,]
results1[1,]
results2[1,]
###### The rejection rate when the null is true
print(sprintf('%%{ovlp1 == 5} = %1.2f (#{ovlp1 == 5} = %1.0f), %%{unadjusted p-values < 0.1} = %1.2f',
nrow(results_all[results_all$ovlp1==5,])/nrow(results_all),
nrow(results_all[results_all$ovlp1==5,]),
mean(results_all$Pr.1[results_all$ovlp1==5]<0.1)))
print(sprintf('%%{ovlp1 == 5} = %1.2f (#{ovlp1 == 5} = %1.0f), %%{adjusted p-values < 0.1} = %1.2f',
nrow(results_all[results_all$ovlp1==5,])/nrow(results_all),
nrow(results_all[results_all$ovlp1==5,]),
mean(results_all$Pr.2[results_all$ovlp1==5]<0.1)))
###### When the alternative is true
## take out the instances where ovlp1<5 -- alternative is true
results_all_less5 = results_all[results_all$ovlp1<5,]
## ovlp_diff > 0
print(sprintf('#{ovlp_diff > 0} = %1.0f (#{ovlp1 < 5} = %1.0f), %%{unadjusted p-values < 0.1} = %1.2f',
nrow(results_all_less5[results_all_less5$ovlp_diff>0,]),
nrow(results_all_less5),
mean(results_all_less5$Pr.1[results_all_less5$ovlp_diff>0]<0.1)))
print(sprintf('#{ovlp_diff > 0} = %1.0f (#{ovlp1 < 5} = %1.0f), %%{adjusted p-values < 0.1} = %1.2f',
nrow(results_all_less5[results_all_less5$ovlp_diff>0,]),
nrow(results_all_less5),
mean(results_all_less5$Pr.2[results_all_less5$ovlp_diff>0]<0.1)))
## ovlp_diff = 0
print(sprintf('#{ovlp_diff = 0} = %1.0f (#{ovlp1 < 5} = %1.0f), %%{unadjusted p-values < 0.1} = %1.2f',
nrow(results_all_less5[results_all_less5$ovlp_diff==0,]),
nrow(results_all_less5),
mean(results_all_less5$Pr.1[results_all_less5$ovlp_diff==0]<0.1)))
print(sprintf('#{ovlp_diff = 0} = %1.0f (#{ovlp1 < 5} = %1.0f), %%{adjusted p-values < 0.1} = %1.2f',
nrow(results_all_less5[results_all_less5$ovlp_diff==0,]),
nrow(results_all_less5),
mean(results_all_less5$Pr.2[results_all_less5$ovlp_diff==0]<0.1)))
## ovlp_diff = 0
print(sprintf('#{ovlp_diff = 0} = %1.0f (#{ovlp1 < 5} = %1.0f), %%{unadjusted p-values < 0.1} = %1.3f',
nrow(results_all_less5[results_all_less5$ovlp_diff==0,]),
nrow(results_all_less5),
mean(results_all_less5$Pr.1[results_all_less5$ovlp_diff==0]<0.1)))
print(sprintf('#{ovlp_diff = 0} = %1.0f (#{ovlp1 < 5} = %1.0f), %%{adjusted p-values < 0.1} = %1.3f',
nrow(results_all_less5[results_all_less5$ovlp_diff==0,]),
nrow(results_all_less5),
mean(results_all_less5$Pr.2[results_all_less5$ovlp_diff==0]<0.1)))
## ovlp_diff > 0
print(sprintf('#{ovlp_diff > 0} = %1.0f (#{ovlp1 < 5} = %1.0f), %%{unadjusted p-values < 0.1} = %1.3f',
nrow(results_all_less5[results_all_less5$ovlp_diff>0,]),
nrow(results_all_less5),
mean(results_all_less5$Pr.1[results_all_less5$ovlp_diff>0]<0.1)))
print(sprintf('#{ovlp_diff > 0} = %1.0f (#{ovlp1 < 5} = %1.0f), %%{adjusted p-values < 0.1} = %1.3f',
nrow(results_all_less5[results_all_less5$ovlp_diff>0,]),
nrow(results_all_less5),
mean(results_all_less5$Pr.2[results_all_less5$ovlp_diff>0]<0.1)))
print(sprintf('#{ovlp_diff = 2} = %1.0f (#{ovlp1 < 5} = %1.0f), %%{unadjusted p-values < 0.1} = %1.3f',
nrow(results_all_less5[results_all_less5$ovlp_diff==2,]),
nrow(results_all_less5),
mean(results_all_less5$Pr.1[results_all_less5$ovlp_diff==2]<0.1)))
print(sprintf('#{ovlp_diff = 2} = %1.0f (#{ovlp1 < 5} = %1.0f), %%{adjusted p-values < 0.1} = %1.3f',
nrow(results_all_less5[results_all_less5$ovlp_diff==2,]),
nrow(results_all_less5),
mean(results_all_less5$Pr.2[results_all_less5$ovlp_diff==2]<0.1)))
results <- data.frame(results_all)
summary(unlist(results$min))
summary(unlist(results$max))
df <- results %>%
mutate(id = 1:n()) %>%
gather(key, value, -khat1, -khat2, -ovlp1, -ovlp_diff, -id, -min, -max) %>%
separate(key, into = c("var", "pvalue")) %>%
spread(key = var, value) %>%
unnest() %>%
mutate(pvalue = recode(as.factor(pvalue), `1` = "Naive", `2` = "Adjusted"))
library(tidyverse)
library(knitr)
library(selectiveInference)
df <- results %>%
mutate(id = 1:n()) %>%
gather(key, value, -khat1, -khat2, -ovlp1, -ovlp_diff, -id, -min, -max) %>%
separate(key, into = c("var", "pvalue")) %>%
spread(key = var, value) %>%
unnest() %>%
mutate(pvalue = recode(as.factor(pvalue), `1` = "Naive", `2` = "Adjusted"))
models <- table(df$khat1, df$ovlp1, df$khat2, df$ovlp_diff)/2
tb <- expand.grid(rownames(models), colnames(models))
tb$value <- sapply(1:nrow(tb), function(i) models[tb[i,1], tb[i,2],tb[i,3], tb[i,4]])
fstepbic <- ggplot(tb, aes(Var1, Var2)) +
geom_tile(aes(fill = value)) +
geom_text(aes(label = value), color="white") +
scale_x_discrete(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
scale_fill_gradient("Instances", low = "white", high = "black") +
xlab("Selected sparsity") + ylab("True positives") +
theme_minimal()
fstepbic
df <- results %>%
mutate(id = 1:n()) %>%
gather(key, value, -khat1, -khat2, -ovlp1, -ovlp_diff, -id, -min, -max) %>%
separate(key, into = c("var", "pvalue")) %>%
spread(key = var, value) %>%
unnest() %>%
mutate(pvalue = recode(as.factor(pvalue), `1` = "Naive", `2` = "Adjusted"))
nrow(results)
models <- table(df$khat1, df$ovlp1)/2
tb <- expand.grid(rownames(models), colnames(models))
tb$value <- sapply(1:nrow(tb), function(i) models[tb[i,1], tb[i,2]])
fstepbic <- ggplot(tb, aes(Var1, Var2)) +
geom_tile(aes(fill = value)) +
geom_text(aes(label = value), color="white") +
scale_x_discrete(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
scale_fill_gradient("Instances", low = "white", high = "black") +
xlab("Selected sparsity") + ylab("True positives") +
theme_minimal()
fstepbic
tb[i,1]
tb[,1]
tb
df
models <- table(df$khat1, df$ovlp1)/2
models
df$ovlp1+df$ovlp_diff
colnames(models)
models <- table(df$khat2, df$ovlp1+df$ovlp_diff)/2
tb <- expand.grid(rownames(models), colnames(models))
tb$value <- sapply(1:nrow(tb), function(i) models[tb[i,1], tb[i,2]])
fstepbic <- ggplot(tb, aes(Var1, Var2)) +
geom_tile(aes(fill = value)) +
geom_text(aes(label = value), color="white") +
scale_x_discrete(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
scale_fill_gradient("Instances", low = "white", high = "black") +
xlab("Selected sparsity") + ylab("True positives") +
theme_minimal()
fstepbic
models <- table(df$khat1, df$ovlp1)/2
tb <- expand.grid(rownames(models), colnames(models))
tb$value <- sapply(1:nrow(tb), function(i) models[tb[i,1], tb[i,2]])
fstepbic <- ggplot(tb, aes(Var1, Var2)) +
geom_tile(aes(fill = value)) +
geom_text(aes(label = value), color="white") +
scale_x_discrete(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
scale_fill_gradient("Instances", low = "white", high = "black") +
xlab("Selected sparsity") + ylab("True positives") +
theme_minimal()
fstepbic
models <- table(df$khat1, df$ovlp1)/2
tb <- expand.grid(rownames(models), colnames(models))
tb$value <- sapply(1:nrow(tb), function(i) models[tb[i,1], tb[i,2]])
fstepbic1 <- ggplot(tb, aes(Var1, Var2)) +
geom_tile(aes(fill = value)) +
geom_text(aes(label = value), color="white") +
scale_x_discrete(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
scale_fill_gradient("Instances", low = "white", high = "black") +
xlab("Selected sparsity") + ylab("True positives") +
theme_minimal()
models <- table(df$khat2, df$ovlp1+df$ovlp_diff)/2
tb <- expand.grid(rownames(models), colnames(models))
tb$value <- sapply(1:nrow(tb), function(i) models[tb[i,1], tb[i,2]])
fstepbic2 <- ggplot(tb, aes(Var1, Var2)) +
geom_tile(aes(fill = value)) +
geom_text(aes(label = value), color="white") +
scale_x_discrete(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
scale_fill_gradient("Instances", low = "white", high = "black") +
xlab("Selected sparsity") + ylab("True positives") +
theme_minimal()
pdf_file_name <- sprintf("/Users/wyao/Dropbox/RESEARCH/post-selection/high_dimension/fstepbic.pdf")
myplot <- grid.arrange(fstepbic1,fstepbic2,
#top = textGrob("Spend and Number of Players by Game",
#gp=gpar(fontsize=16,font=2)),
nrow = 2)
ggsave(file=pdf_file_name, myplot)
library(grid)
library(gridExtra)
install.packages("gridExtra")
library(grid)
library(gridExtra)
pdf_file_name <- sprintf("/Users/wyao/Dropbox/RESEARCH/post-selection/high_dimension/fstepbic.pdf")
pdf(pdf_file_name, width=14)
pdf_file_name <- sprintf("/Users/wyao/Dropbox/RESEARCH/post_selection/high_dimension/fstepbic.pdf")
pdf(pdf_file_name, width=14)
myplot <- grid.arrange(fstepbic1,fstepbic2,
#top = textGrob("Spend and Number of Players by Game",
#gp=gpar(fontsize=16,font=2)),
nrow = 2)
ggsave(file=pdf_file_name, myplot)
pdf_file_name <- sprintf("/Users/wyao/Dropbox/RESEARCH/post_selection/high_dimension/fstepbic.pdf")
pdf(pdf_file_name, width=10)
myplot <- grid.arrange(fstepbic1,fstepbic2,
#top = textGrob("Spend and Number of Players by Game",
#gp=gpar(fontsize=16,font=2)),
nrow = 2)
ggsave(file=pdf_file_name, myplot)
pdf_file_name <- sprintf("/Users/wyao/Dropbox/RESEARCH/post_selection/high_dimension/fstepbic.pdf")
pdf(pdf_file_name, width=8)
myplot <- grid.arrange(fstepbic1,fstepbic2,
#top = textGrob("Spend and Number of Players by Game",
#gp=gpar(fontsize=16,font=2)),
nrow = 2)
ggsave(file=pdf_file_name, myplot)
pdf_file_name <- sprintf("/Users/wyao/Dropbox/RESEARCH/post_selection/high_dimension/fstepbic.pdf")
pdf(pdf_file_name, width=8.75)
myplot <- grid.arrange(fstepbic1,fstepbic2,
#top = textGrob("Spend and Number of Players by Game",
#gp=gpar(fontsize=16,font=2)),
nrow = 2)
ggsave(file=pdf_file_name, myplot)
fdists1 <- ggplot(df, aes(Pr, linetype = as.factor(pvalue))) +
geom_density() + #aes(color = as.ordered(overlap))) +
facet_wrap(~ as.factor(ovlp1), nrow = 2, scales = "free") +
xlab("p-value") + ggtitle("Overlap of RIC selected model") +
theme_minimal()
fdists1
fdists1 <- ggplot(df, aes(Pr, linetype = as.factor(pvalue))) +
geom_density() + #aes(color = as.ordered(overlap))) +
facet_wrap(~ as.factor(ovlp1), nrow = 2, scales = "free") +
xlab("p-value") + ggtitle("Overlap of RIC selected model") +
theme_minimal()
fdists1
df[1,]
fdists2 <- ggplot(df_subset, aes(Pr)) +
geom_density() + #aes(color = as.ordered(overlap))) +
facet_wrap(~ as.factor(ovlp1), nrow = 1) +
xlab("p-value") + ggtitle("Selected overlap") + theme_minimal()
df_subset <- df %>% filter(pvalue == "Adjusted")
fdists2 <- ggplot(df_subset, aes(Pr)) +
geom_density() + #aes(color = as.ordered(overlap))) +
facet_wrap(~ as.factor(ovlp1), nrow = 1) +
xlab("p-value") + ggtitle("Selected overlap") + theme_minimal()
fdists2
nrow(results_all_less5[results_all_less5$ovlp_diff==2,]
)
nrow(results_all_less5[results_all_less5$ovlp_diff==1,])
results_all_less5[results_all_less5$ovlp_diff==2,]
results_all_less5[results_all_less5$ovlp_diff==2,c(1,2,3,4,7,10)]
mean(results_all_less5[results_all_less5$ovlp_diff==2,10])
mean(results_all_less5$Pr.2[results_all_less5$ovlp_diff==2])
results_all_less5$Pr.2[results_all_less5$ovlp_diff==2]
###### When the alternative is true
## take out the instances where ovlp1<5 -- alternative is true
results_all_less5 = results_all[results_all$ovlp1<5,]
results_all_less5[results_all_less5$ovlp_diff==2,]
results_all_less5[results_all_less5$ovlp_diff==2,10]
results_all_less5$ovlp_diff[results_all_less5$ovlp_diff==2]
as.vector(results_all_less5$Pr.2[results_all_less5$ovlp_diff==2])
vector(results_all_less5$Pr.2[results_all_less5$ovlp_diff==2])
as.numeric(results_all_less5$Pr.2[results_all_less5$ovlp_diff==2])
mean(as.numeric(results_all_less5$Pr.2[results_all_less5$ovlp_diff==2]))
mean(results_all_less5$Pr.2[results_all_less5$ovlp_diff==2]<0.1)
###### The rejection rate when the null is true
print(sprintf('%%{ovlp1 == 5} = %1.2f (#{ovlp1 == 5} = %1.0f), %%{unadjusted p-values < 0.1} = %1.3f',
nrow(results_all[results_all$ovlp1==5,])/nrow(results_all),
nrow(results_all[results_all$ovlp1==5,]),
mean(results_all$Pr.1[results_all$ovlp1==5]<0.1)))
print(sprintf('%%{ovlp1 == 5} = %1.2f (#{ovlp1 == 5} = %1.0f), %%{adjusted p-values < 0.1} = %1.3f',
nrow(results_all[results_all$ovlp1==5,])/nrow(results_all),
nrow(results_all[results_all$ovlp1==5,]),
mean(results_all$Pr.2[results_all$ovlp1==5]<0.1)))
results_all_less5 = results_all[results_all$ovlp1<5,]
## ovlp_diff > 0
print(sprintf('#{ovlp_diff > 0} = %1.0f (#{ovlp1 < 5} = %1.0f), %%{unadjusted p-values < 0.1} = %1.3f',
nrow(results_all_less5[results_all_less5$ovlp_diff>0,]),
nrow(results_all_less5),
mean(results_all_less5$Pr.1[results_all_less5$ovlp_diff>0]<0.1)))
print(sprintf('#{ovlp_diff > 0} = %1.0f (#{ovlp1 < 5} = %1.0f), %%{adjusted p-values < 0.1} = %1.3f',
nrow(results_all_less5[results_all_less5$ovlp_diff>0,]),
nrow(results_all_less5),
mean(results_all_less5$Pr.2[results_all_less5$ovlp_diff>0]<0.1)))
print(sprintf('#{ovlp_diff = 0} = %1.0f (#{ovlp1 < 5} = %1.0f), %%{unadjusted p-values < 0.1} = %1.3f',
nrow(results_all_less5[results_all_less5$ovlp_diff==0,]),
nrow(results_all_less5),
mean(results_all_less5$Pr.1[results_all_less5$ovlp_diff==0]<0.1)))
print(sprintf('#{ovlp_diff = 0} = %1.0f (#{ovlp1 < 5} = %1.0f), %%{adjusted p-values < 0.1} = %1.3f',
nrow(results_all_less5[results_all_less5$ovlp_diff==0,]),
nrow(results_all_less5),
mean(results_all_less5$Pr.2[results_all_less5$ovlp_diff==0]<0.1)))
df <- results %>%
mutate(id = 1:n()) %>%
gather(key, value, -khat1, -khat2, -ovlp1, -ovlp_diff, -id, -min, -max) %>%
separate(key, into = c("var", "pvalue")) %>%
spread(key = var, value) %>%
unnest() %>%
mutate(pvalue = recode(as.factor(pvalue), `1` = "Naive", `2` = "Adjusted"))
models <- table(df$khat1, df$ovlp1)/2
tb <- expand.grid(rownames(models), colnames(models))
tb$value <- sapply(1:nrow(tb), function(i) models[tb[i,1], tb[i,2]])
fstepbic1 <- ggplot(tb, aes(Var1, Var2)) +
geom_tile(aes(fill = value)) +
geom_text(aes(label = value), color="white") +
scale_x_discrete(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
scale_fill_gradient("Instances", low = "white", high = "black") +
xlab("Selected sparsity") + ylab("True positives") +
theme_minimal()
models <- table(df$khat2, df$ovlp1+df$ovlp_diff)/2
tb <- expand.grid(rownames(models), colnames(models))
tb$value <- sapply(1:nrow(tb), function(i) models[tb[i,1], tb[i,2]])
fstepbic2 <- ggplot(tb, aes(Var1, Var2)) +
geom_tile(aes(fill = value)) +
geom_text(aes(label = value), color="white") +
scale_x_discrete(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
scale_fill_gradient("Instances", low = "white", high = "black") +
xlab("Selected sparsity") + ylab("True positives") +
theme_minimal()
pdf_file_name <- sprintf("/Users/wyao/Dropbox/RESEARCH/post_selection/high_dimension/document/fstepbic.pdf")
pdf(pdf_file_name, width=8.75)
myplot <- grid.arrange(fstepbic1,fstepbic2,
#top = textGrob("Spend and Number of Players by Game",
#gp=gpar(fontsize=16,font=2)),
nrow = 2)
ggsave(file=pdf_file_name, myplot)
models <- table(df$khat1, df$ovlp1)/2
tb <- expand.grid(rownames(models), colnames(models))
tb$value <- sapply(1:nrow(tb), function(i) models[tb[i,1], tb[i,2]])
fstepbic1 <- ggplot(tb, aes(Var1, Var2)) +
geom_tile(aes(fill = value)) +
geom_text(aes(label = value), color="white") +
scale_x_discrete(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
scale_fill_gradient("Instances", low = "white", high = "black") +
xlab("Selected sparsity") + ylab("True positives") +
theme_minimal()
pdf_file_name <- sprintf("/Users/wyao/Dropbox/RESEARCH/post_selection/high_dimension/document/fstepbic1.pdf")
# pdf(pdf_file_name, width=8.75)
myplot <- grid.arrange(fstepbic1,fstepbic2,
#top = textGrob("Spend and Number of Players by Game",
#gp=gpar(fontsize=16,font=2)),
nrow = 2)
ggsave(file=pdf_file_name, myplot)
pdf_file_name <- sprintf("/Users/wyao/Dropbox/RESEARCH/post_selection/high_dimension/document/fstepbic1.pdf")
# pdf(pdf_file_name, width=8.75)
ggsave(file=pdf_file_name, fstepbic1)
pdf_file_name <- sprintf("/Users/wyao/Dropbox/RESEARCH/post_selection/high_dimension/document/fstepbic1.pdf")
pdf(pdf_file_name, width=8.75)
ggsave(file=pdf_file_name, fstepbic1)
fit2 <- groupfs(x, y, index = 1:ncol(x), k = 2*log(p), intercept = FALSE, center = FALSE, normalize = FALSE,
maxsteps = length(fit1$action)+5)
# ----
# High-dimension simulation code
# ----
set.seed(1)
n <- 100
p <- 200
active <- 1:5
nz <- 1/3
x <- matrix(rnorm(n*p), nrow = n)
beta <- rep(0, p)
beta[active] <- nz
beta[1:3] <- 1
mu <- x %*% beta
y <- mu + rnorm(n)
ybar <- mean(y)
y <- y - ybar
data <- data.frame(y = y, x = x)
fit1 <- groupfs(x, y, index = 1:ncol(x), k = 2*log(p), intercept = FALSE, center = FALSE, normalize = FALSE, aicstop = 1)
fit2 <- groupfs(x, y, index = 1:ncol(x), k = 2*log(p), intercept = FALSE, center = FALSE, normalize = FALSE,
maxsteps = length(fit1$action)+5)
fit1
fit2
attr(fit2, "stopped")
