while(1){
model = keras_model_sequential()
model %>%
layer_dense(units = 16, activation = 'relu', input_shape = c(25), kernel_regularizer = regularizer_l1(0.1)) %>%
layer_dense(units = 16, activation = 'relu', kernel_regularizer = regularizer_l1(0.1)) %>%
layer_dense(units = 2, activation = 'softmax', kernel_regularizer = regularizer_l1(0.1))
y = model %>% predict_classes(as.matrix(des))
noise = sample(1:1372, 100, replace = FALSE)
y[noise] = 1-y[noise]
y = factor(y)
if(sum(y == 1) > 100 & sum(y == 1) < 1270){
break
}
print("invalid")
}
# y
#
unfactor = function(x){
return(as.numeric(as.character(x)))
}
des = des[, c(1, seq(11, 25, 1))]
# one = one[, c(1, seq(6, 25, 1))]
library(randomForest)
library(rpart)
library(rpart.plot)
#forest_m1 = randomForest(y ~ x, data = dat_so)
id_des = sample(seq(1, nrow(des)), 200)
train_des = data.frame(y[id_des], des[id_des, ])
colnames(train_des)[1] = "y"
#forest_m1 = randomForest(y ~ ., train_des, ntree = 100)
forest_m1 = rpart(y ~ ., data = train_des, control = rpart.control(xval = 10, maxdepth = 1))
t = ((forest_m1)$cptable)
var1 = t[dim(t)[1], 2]
# var1 = forest_var(des, y)
train_one = data.frame(y[id_des], one[id_des, ])
colnames(train_one)[1] = "y"
#forest_m2 = randomForest(y ~ ., train_one, ntree = 100)
forest_m2 = rpart(y ~ ., data = train_one, control = rpart.control(xval = 10, maxdepth = 1))
# rpart.plot(forest_m1)
t = ((forest_m2)$cptable)
var2 = t[dim(t)[1], 2]
if(is.null(var2)){
var2 = 0
}
if(is.null(var1)){
var1 = 0
}
# var2 = forest_var(one, y)
ta_error_des = mean((unfactor(y[id_des]) != unfactor(predict(forest_m1, train_des, type = "class")))^2)
te_error_des = mean((unfactor(y[-id_des]) != unfactor(predict(forest_m1, des[-id_des, ], type = "class")))^2)
ta_error_one = mean((unfactor(y[id_des]) != unfactor(predict(forest_m2, train_one, type = "class")))^2)
te_error_one = mean((unfactor(y[-id_des]) != unfactor(predict(forest_m2, one[-id_des, ], type = "class")))^2)
# change of domain
# new domain resample the first two components
#Z = list()
#num = c(2, 2, 7, 7, 7)
for(i in 1:2){
zs = matrix(rnorm(num[i] * 5, 1, 1), nrow = num[i], ncol = 5)
Z[[i]] = zs
}
# generate descriptors
des_new <<- list()
f = function(x, i){
if(i == 6){
#print(x)
des_new <<- append(des_new, list(x))
return()
}
for(j in 1:num[i]){
xx = c(x, Z[[i]][j, ])
f(xx, i + 1)
}
}
f(c(), 1)
des_new = data.frame(des_new)
des_new = as.data.frame(t(as.matrix(des_new)))
rownames(des_new) = NULL
# generate onehot
one_new <<- list()
f = function(x, i){
if(i == 6){
#print(x)
one_new <<- append(one_new, list(x))
return()
}
for(j in 1:num[i]){
t = rep(0, num[i])
if(i > 2){
t[j] = 1
}
xx = c(x, t)
f(xx, i + 1)
}
}
f(c(), 1)
one_new = data.frame(one_new)
one_new = as.data.frame(t(as.matrix(one_new)))
rownames(one_new) = NULL
y_new = model %>% predict_classes(as.matrix(one_new))
y_new = factor(y_new)
#var1 = forest_var(des, y, des_new, y_new)
#var2 = forest_var(one, y, one_new, y_new)
library(randomForest)
ta_error_des_new = mean((unfactor(y_new[-id_des]) != unfactor(predict(forest_m1, des_new, type = "class")[-id_des]))^2)
ta_error_one_new = mean((unfactor(y_new[-id_des]) != unfactor(predict(forest_m2, one_new, type = "class")[-id_des]))^2)
res[k, 1] = te_error_des
res[k, 2] = te_error_one
res[k, 3] = ta_error_des_new
res[k, 4] = ta_error_one_new
res[k, 5] = var1
res[k, 6] = var2
}
colMeans(res[1:500,])
unlist(lapply(data.frame(res[1:500,]), sd))
res = matrix(nrow = 500, ncol = 6)
for(k in 1:500){
print(k)
Z = list()
num = c(2, 2, 7, 7, 7)
for(i in 1:5){
zs = matrix(rnorm(num[i] * 5, 0, 1), nrow = num[i], ncol = 5)
Z = append(Z, list(zs))
}
# generate descriptors
des <<- list()
f = function(x, i){
if(i == 6){
#print(x)
des <<- append(des, list(x))
return()
}
for(j in 1:num[i]){
xx = c(x, Z[[i]][j, ])
f(xx, i + 1)
}
}
f(c(), 1)
des = data.frame(des)
des = as.data.frame(t(as.matrix(des)))
rownames(des) = NULL
# generate onehot
one <<- list()
f = function(x, i){
if(i == 6){
#print(x)
one <<- append(one, list(x))
return()
}
for(j in 1:num[i]){
t = rep(0, num[i])
t[j] = 1
xx = c(x, t)
f(xx, i + 1)
}
}
f(c(), 1)
one = data.frame(one)
one = as.data.frame(t(as.matrix(one)))
rownames(one) = NULL
# true model
# if only one or two components are having impacts
library(keras)
while(1){
model = keras_model_sequential()
model %>%
layer_dense(units = 16, activation = 'relu', input_shape = c(25), kernel_regularizer = regularizer_l1(0.1)) %>%
layer_dense(units = 16, activation = 'relu', kernel_regularizer = regularizer_l1(0.1)) %>%
layer_dense(units = 2, activation = 'softmax', kernel_regularizer = regularizer_l1(0.1))
y = model %>% predict_classes(as.matrix(des))
noise = sample(1:1372, 100, replace = FALSE)
y[noise] = 1-y[noise]
y = factor(y)
if(sum(y == 1) > 100 & sum(y == 1) < 1270){
break
}
print("invalid")
}
# y
#
unfactor = function(x){
return(as.numeric(as.character(x)))
}
des = des[, c(1, seq(11, 25, 1))]
# one = one[, c(1, seq(6, 25, 1))]
library(randomForest)
library(rpart)
library(rpart.plot)
#forest_m1 = randomForest(y ~ x, data = dat_so)
id_des = sample(seq(1, nrow(des)), 200)
train_des = data.frame(y[id_des], des[id_des, ])
colnames(train_des)[1] = "y"
#forest_m1 = randomForest(y ~ ., train_des, ntree = 100)
forest_m1 = rpart(y ~ ., data = train_des, control = rpart.control(xval = 10))
t = ((forest_m1)$cptable)
var1 = t[dim(t)[1], 2]
# var1 = forest_var(des, y)
train_one = data.frame(y[id_des], one[id_des, ])
colnames(train_one)[1] = "y"
#forest_m2 = randomForest(y ~ ., train_one, ntree = 100)
forest_m2 = rpart(y ~ ., data = train_one, control = rpart.control(xval = 10))
# rpart.plot(forest_m1)
t = ((forest_m2)$cptable)
var2 = t[dim(t)[1], 2]
if(is.null(var2)){
var2 = 0
}
if(is.null(var1)){
var1 = 0
}
# var2 = forest_var(one, y)
ta_error_des = mean((unfactor(y[id_des]) != unfactor(predict(forest_m1, train_des, type = "class")))^2)
te_error_des = mean((unfactor(y[-id_des]) != unfactor(predict(forest_m1, des[-id_des, ], type = "class")))^2)
ta_error_one = mean((unfactor(y[id_des]) != unfactor(predict(forest_m2, train_one, type = "class")))^2)
te_error_one = mean((unfactor(y[-id_des]) != unfactor(predict(forest_m2, one[-id_des, ], type = "class")))^2)
# change of domain
# new domain resample the first two components
#Z = list()
#num = c(2, 2, 7, 7, 7)
for(i in 1:2){
zs = matrix(rnorm(num[i] * 5, 1, 1), nrow = num[i], ncol = 5)
Z[[i]] = zs
}
# generate descriptors
des_new <<- list()
f = function(x, i){
if(i == 6){
#print(x)
des_new <<- append(des_new, list(x))
return()
}
for(j in 1:num[i]){
xx = c(x, Z[[i]][j, ])
f(xx, i + 1)
}
}
f(c(), 1)
des_new = data.frame(des_new)
des_new = as.data.frame(t(as.matrix(des_new)))
rownames(des_new) = NULL
# generate onehot
one_new <<- list()
f = function(x, i){
if(i == 6){
#print(x)
one_new <<- append(one_new, list(x))
return()
}
for(j in 1:num[i]){
t = rep(0, num[i])
if(i > 2){
t[j] = 1
}
xx = c(x, t)
f(xx, i + 1)
}
}
f(c(), 1)
one_new = data.frame(one_new)
one_new = as.data.frame(t(as.matrix(one_new)))
rownames(one_new) = NULL
y_new = model %>% predict_classes(as.matrix(one_new))
y_new = factor(y_new)
#var1 = forest_var(des, y, des_new, y_new)
#var2 = forest_var(one, y, one_new, y_new)
library(randomForest)
ta_error_des_new = mean((unfactor(y_new[-id_des]) != unfactor(predict(forest_m1, des_new, type = "class")[-id_des]))^2)
ta_error_one_new = mean((unfactor(y_new[-id_des]) != unfactor(predict(forest_m2, one_new, type = "class")[-id_des]))^2)
res[k, 1] = te_error_des
res[k, 2] = te_error_one
res[k, 3] = ta_error_des_new
res[k, 4] = ta_error_one_new
res[k, 5] = var1
res[k, 6] = var2
}
colMeans(res[1:500,])
unlist(lapply(data.frame(res[1:500,]), sd))
# Author  :    Ziping Xu
# Email   :    zipingxu@umich.edu
# Date    :    May 17, 2021
# Record  :    Eunjae, correct set up.
# we have 5 components here and each component has 2, 2, 7, 7, 7 elements.
# dimensions all 5.
# sample the elements first
# write a function to compute model variance.
forest_var = function(X, y, testX = NULL, testY = NULL){
# X, y is the full dataset
# We will use bootstrap method to estimate variance.
library(boot)
ids = sample(seq(1, 1372), 1372, replace = FALSE)
X = X[ids,]
y = y[ids]
# function to obtain R-Squared from the data
err_rate <- function(formula, data, indices) {
d <- data[indices,] # allows boot to select sample
if(is.null(testX)){
fit <- randomForest(formula, data=d, ntree = 100, xtest = X[1172:1372,], ytest = y[1172:1372])
}
else{
fit <- randomForest(formula, data=d, ntree = 100, xtest = testX, ytest = testY)
}
return(fit$test$err.rate[100, 1])
}
# bootstrapping with 1000 replications
results <- boot(data=data.frame(y, X)[1:200, ], statistic=err_rate,
R=100, formula=y~.)
# view results
return(sd(results$t))
}
res = matrix(nrow = 500, ncol = 6)
for(k in 1:500){
print(k)
Z = list()
num = c(2, 2, 7, 7, 7)
for(i in 1:5){
zs = matrix(rnorm(num[i] * 5, 0, 1), nrow = num[i], ncol = 5)
Z = append(Z, list(zs))
}
# generate descriptors
des <<- list()
f = function(x, i){
if(i == 6){
#print(x)
des <<- append(des, list(x))
return()
}
for(j in 1:num[i]){
xx = c(x, Z[[i]][j, ])
f(xx, i + 1)
}
}
f(c(), 1)
des = data.frame(des)
des = as.data.frame(t(as.matrix(des)))
rownames(des) = NULL
# generate onehot
one <<- list()
f = function(x, i){
if(i == 6){
#print(x)
one <<- append(one, list(x))
return()
}
for(j in 1:num[i]){
t = rep(0, num[i])
t[j] = 1
xx = c(x, t)
f(xx, i + 1)
}
}
f(c(), 1)
one = data.frame(one)
one = as.data.frame(t(as.matrix(one)))
rownames(one) = NULL
# true model
# if only one or two components are having impacts
library(keras)
while(1){
model = keras_model_sequential()
model %>%
layer_dense(units = 16, activation = 'relu', input_shape = c(25), kernel_regularizer = regularizer_l1(0.1)) %>%
layer_dense(units = 16, activation = 'relu', kernel_regularizer = regularizer_l1(0.1)) %>%
layer_dense(units = 2, activation = 'softmax', kernel_regularizer = regularizer_l1(0.1))
y = model %>% predict_classes(as.matrix(des))
noise = sample(1:1372, 100, replace = FALSE)
y[noise] = 1-y[noise]
y = factor(y)
if(sum(y == 1) > 100 & sum(y == 1) < 1270){
break
}
print("invalid")
}
# y
#
unfactor = function(x){
return(as.numeric(as.character(x)))
}
des = des[, c(1, seq(11, 25, 1))]
# one = one[, c(1, seq(6, 25, 1))]
library(randomForest)
library(rpart)
library(rpart.plot)
#forest_m1 = randomForest(y ~ x, data = dat_so)
id_des = sample(seq(1, nrow(des)), 200)
train_des = data.frame(y[id_des], des[id_des, ])
colnames(train_des)[1] = "y"
#forest_m1 = randomForest(y ~ ., train_des, ntree = 100)
forest_m1 = rpart(y ~ ., data = train_des, control = rpart.control(xval = 10))
t = ((forest_m1)$cptable)
var1 = t[dim(t)[1], 2]
# var1 = forest_var(des, y)
train_one = data.frame(y[id_des], one[id_des, ])
colnames(train_one)[1] = "y"
#forest_m2 = randomForest(y ~ ., train_one, ntree = 100)
forest_m2 = rpart(y ~ ., data = train_one, control = rpart.control(xval = 10))
# rpart.plot(forest_m1)
t = ((forest_m2)$cptable)
var2 = t[dim(t)[1], 2]
if(is.null(var2)){
var2 = 0
}
if(is.null(var1)){
var1 = 0
}
# var2 = forest_var(one, y)
ta_error_des = mean((unfactor(y[id_des]) != unfactor(predict(forest_m1, train_des, type = "class")))^2)
te_error_des = mean((unfactor(y[-id_des]) != unfactor(predict(forest_m1, des[-id_des, ], type = "class")))^2)
ta_error_one = mean((unfactor(y[id_des]) != unfactor(predict(forest_m2, train_one, type = "class")))^2)
te_error_one = mean((unfactor(y[-id_des]) != unfactor(predict(forest_m2, one[-id_des, ], type = "class")))^2)
# change of domain
# new domain resample the first two components
#Z = list()
#num = c(2, 2, 7, 7, 7)
for(i in 1:2){
zs = matrix(rnorm(num[i] * 5, 1, 1), nrow = num[i], ncol = 5)
Z[[i]] = zs
}
# generate descriptors
des_new <<- list()
f = function(x, i){
if(i == 6){
#print(x)
des_new <<- append(des_new, list(x))
return()
}
for(j in 1:num[i]){
xx = c(x, Z[[i]][j, ])
f(xx, i + 1)
}
}
f(c(), 1)
des_new = data.frame(des_new)
des_new = as.data.frame(t(as.matrix(des_new)))
rownames(des_new) = NULL
# generate onehot
one_new <<- list()
f = function(x, i){
if(i == 6){
#print(x)
one_new <<- append(one_new, list(x))
return()
}
for(j in 1:num[i]){
t = rep(0, num[i])
if(i > 2){
t[j] = 1
}
xx = c(x, t)
f(xx, i + 1)
}
}
f(c(), 1)
one_new = data.frame(one_new)
one_new = as.data.frame(t(as.matrix(one_new)))
rownames(one_new) = NULL
y_new = model %>% predict_classes(as.matrix(one_new))
y_new = factor(y_new)
#var1 = forest_var(des, y, des_new, y_new)
#var2 = forest_var(one, y, one_new, y_new)
library(randomForest)
ta_error_des_new = mean((unfactor(y_new[-id_des]) != unfactor(predict(forest_m1, des_new, type = "class")[-id_des]))^2)
ta_error_one_new = mean((unfactor(y_new[-id_des]) != unfactor(predict(forest_m2, one_new, type = "class")[-id_des]))^2)
res[k, 1] = te_error_des
res[k, 2] = te_error_one
res[k, 3] = ta_error_des_new
res[k, 4] = ta_error_one_new
res[k, 5] = var1
res[k, 6] = var2
}
colMeans(res[1:500,])
unlist(lapply(data.frame(res[1:500,]), sd))
1 - 0.8 * log(2)
(1 - 0.8 * log(2))/100
1/256
1/0.8
log(2)
sqrt(1/0.8)/2
sqrt(log(2))
m = c(0.08627224, 0.06196753, 0.01598117,
0.04297673, 0.0332142 , 0.01200712,
0.03939674, 0.02368374, 0.0050971 ,
0.05380833, 0.04346954, 0.01744827)
s = c(0.44229143, 0.30327834, 0.0931952 ,
0.17930563, 0.14251298, 0.06231836,
0.1419799 , 0.08867034, 0.01429667,
0.22622143, 0.21046919, 0.0880834)
m = t(matrix(m, 3, 4))
s = t(matrix(s, 3, 4))
m = data.frame(m)
s = data.frame(s)
colnames(m) = 10^seq(1, 3, 1)
colnames(s) = 10^seq(1, 3, 1)
m$Source = c(10^seq(2, 4, 1), 0)
s$Source = c(10^seq(2, 4, 1), 0)
library(reshape2)
mm = melt(m, id.vars = "Source", value.name = "Mean")
ss = melt(s, id.vars = "Source", value.name = "Std")
mm$Std = ss$Std
dat = mm
colnames(dat)[2] = "Target"
dat$Source[which(dat$Source == 0)] = "Baseline"
dat$Source = factor(dat$Source, levels = c(100, 1000, 10000, "Baseline"), ordered = T)
dat$Target = factor(dat$Target)
library(ggplot2)
g = ggplot(dat) + geom_point(aes(x = Source, y = Mean, color = Target)) +
geom_line(aes(x = Source, y = Mean, group = Target, color = Target)) +
geom_errorbar(aes(x = Source, ymin=Mean-Std/sqrt(100), ymax=Mean + Std/sqrt(100), color = Target), width=.1)+
theme_bw() + ylab("Mean Square Error")
g
ggsave("Simulation_n_effects.pdf", width = 6, height = 3)
setwd("~/")
setwd("/Users/zipingxu/Desktop/Research/Representation_learning/Code_Supplementary/Code/figures")
ggsave("Simulation_n_effects.pdf", width = 6, height = 3)
