n <- dim(X)[1]
m <- dim(Y)[1]
Complete_X <- X[! rowSums(is.na(X)) > 0,] ### samples in X completely observed
Complete_Y <- Y[! rowSums(is.na(Y)) > 0,]
Incomplete_X <- X[rowSums(is.na(X)) > 0,] ### samples in X incompletely observed
Incomplete_Y <- Y[rowSums(is.na(Y)) > 0,]
n_prime <- dim(Complete_X)[1]
m_prime <- dim(Complete_Y)[1]
c_1 <- 2/(n*(n-1))
c_2 <- 2/(m*(m-1))
c_3 <- 2/(n*m)
### compute termone
A_1_1 <- 0
A_1_2 <- 0
if( (n == n_prime) | (m == m_prime) ){
A_1_3 <- 0
}else{
A_1_3 <- c_3*compute_A_1_3(Incomplete_X, Incomplete_Y,beta)
}
D_XY_k <- abs(outer(Incomplete_X[, k], Incomplete_Y[, k], "-"))
compute_A_1_3 <- function(Incomplete_X, Incomplete_Y,beta){
if(is.vector(Incomplete_X)){
Incomplete_X <- matrix(Incomplete_X, nrow = 1)
}
if(is.vector(Incomplete_Y)){
Incomplete_Y <- matrix(Incomplete_Y, nrow = 1)
}
n <- dim(Incomplete_X)[1]
m <- dim(Incomplete_Y)[1]
d <- dim(Incomplete_X)[2]
D_XY <- matrix(0, n, m)
for (k in 1:d) {
D_XY_k <- abs(outer(Incomplete_X[, k], Incomplete_Y[, k], "-"))
D_XY_k[is.na(D_XY_k)] <- 0
D_XY <- D_XY + D_XY_k
}
return(sum(exp(-beta*D_XY)))
}
n <- dim(X)[1]
m <- dim(Y)[1]
Complete_X <- X[! rowSums(is.na(X)) > 0,] ### samples in X completely observed
Complete_Y <- Y[! rowSums(is.na(Y)) > 0,]
Incomplete_X <- X[rowSums(is.na(X)) > 0,] ### samples in X incompletely observed
Incomplete_Y <- Y[rowSums(is.na(Y)) > 0,]
n_prime <- dim(Complete_X)[1]
m_prime <- dim(Complete_Y)[1]
c_1 <- 2/(n*(n-1))
c_2 <- 2/(m*(m-1))
c_3 <- 2/(n*m)
### compute termone
A_1_1 <- 0
A_1_2 <- 0
if( (n == n_prime) | (m == m_prime) ){
A_1_3 <- 0
}else{
A_1_3 <- c_3*compute_A_1_3(Incomplete_X, Incomplete_Y,beta)
}
A_1 <- A_1_1 + A_1_2 - A_1_3
Lower_Bound_MMD(Incomplete_X, Incomplete_Y, beta)
Incomplete_X
d <- 10
n <- 100
m <- 100
mu_1 <- rep(0,d)
sigma_1 <- diag(d)
X <- MASS::mvrnorm(n, mu_1, sigma_1)
mu_2 <- rep(0,d)
sigma_2 <- diag(d)
Y <- MASS::mvrnorm(n, mu_2, sigma_2)
S <- 0.05
s <- 0.2
Incomplete_X <- MCAR_Multivariate(X,S,s)
Incomplete_Y <- MCAR_Multivariate(Y,S,s)
beta <- MedianHeuristic(Incomplete_X[!is.na(rowSums(Incomplete_X)),],Incomplete_Y[!is.na(rowSums(Incomplete_Y)),])
############## Compute $p$-value of proposed method in the presence missing data.
rm(list = ls())
set.seed(0)
source('MMD using permutation with Missing data.R')
############## Compute $p$-value of proposed method in the presence missing data.
rm(list = ls())
set.seed(0)
source('MMD using permutation with Missing data.R')
## case 4:  nota all samples are completely observed
MCAR_Multivariate <- function(X,S,s){
# given X, return incomplete X with S proportion of incompletely observed samples,
# each incomplete sample with s proportion of unobserved dimensions
n <- dim(X)[1]
d <- dim(X)[2]
missing_location <- sample(1:n, S*n)
X[missing_location, sample(1:d, s*d)] <- NA
return(X)
}
d <- 10
n <- 100
m <- 100
mu_1 <- rep(0,d)
sigma_1 <- diag(d)
X <- MASS::mvrnorm(n, mu_1, sigma_1)
mu_2 <- rep(0,d)
sigma_2 <- diag(d)
Y <- MASS::mvrnorm(n, mu_2, sigma_2)
S <- 0.05
s <- 0.2
Incomplete_X <- MCAR_Multivariate(X,S,s)
Incomplete_Y <- MCAR_Multivariate(Y,S,s)
beta <- MedianHeuristic(Incomplete_X[!is.na(rowSums(Incomplete_X)),],Incomplete_Y[!is.na(rowSums(Incomplete_Y)),])
res_case_3 <- permutation_testing_with_missing_data(Incomplete_X, Incomplete_Y, beta, perm = 100)
res_case_3$stat
res_case_3$pval
Upper_Bound_MMD(Incomplete_X, Incomplete_Y,beta)
## case 5:  samples are all observed
d <- 10
## case 5:  samples are all observed
d <- 50
n <- 100
m <- 100
mu_1 <- rep(0,d)
sigma_1 <- diag(d)
X <- MASS::mvrnorm(n, mu_1, sigma_1)
mu_2 <- rep(0,d)
sigma_2 <- diag(d)
Y <- MASS::mvrnorm(n, mu_2, sigma_2)
beta <- MedianHeuristic(X,Y)
############## Compute $p$-value of proposed method in the presence missing data.
rm(list = ls())
set.seed(0)
source('MMD using permutation with Missing data.R')
source('MMD using CLT with Missing Data')
## case 1: samples are all observed
n <- 100
source('MMD using CLT with Missing Data.R')
## case 1: samples are all observed
n <- 100
m <- 100
X <- rnorm(n, 0, 1)
Y <- rnorm(m, 0, 1)
beta <- MedianHeuristic(X,Y)
res_case_1 <- permutation_testing_with_missing_data(X,Y,beta,perm = 200)
res_case_1$stat
res_case_1$pval
## case 2: samples with missing data
MCAR_univariate <- function(X,s){
# given X, return incomplete X with s proportion of missing data
n <- length(X)
missing_location <- sample(1:n, s*n)
X[missing_location] <- NA
return(X)
}
n <- 100
m <- 100
X <- rnorm(n, 0, 1)
Y <- rnorm(m, 1, 1)
s <- 0.05
Incomplete_X <- MCAR_univariate(X, s)
Incomplete_Y <- MCAR_univariate(Y, s)
beta <- MedianHeuristic(Incomplete_X[!is.na(Incomplete_X)], Incomplete_Y[!is.na(Incomplete_Y)])
res_case_2 <- permutation_testing_with_missing_data(Incomplete_X,Incomplete_Y,beta,perm = 100)
res_case_2$stat
res_case_2$pval
## case 3:  samples are all observed
d <- 10
n <- 100
m <- 100
mu_1 <- rep(0,d)
sigma_1 <- diag(d)
X <- MASS::mvrnorm(n, mu_1, sigma_1)
mu_2 <- rep(0,d)
sigma_2 <- diag(d)
Y <- MASS::mvrnorm(n, mu_2, sigma_2)
beta <- MedianHeuristic(X,Y)
res_case_3 <- permutation_testing_with_missing_data(X, Y, beta, perm = 100)
res_case_3$stat
res_case_3$pval
## case 4:  not all samples are completely observed
MCAR_Multivariate <- function(X,S,s){
# given X, return incomplete X with S proportion of incompletely observed samples,
# each incomplete sample with s proportion of unobserved dimensions
n <- dim(X)[1]
d <- dim(X)[2]
missing_location <- sample(1:n, S*n)
X[missing_location, sample(1:d, s*d)] <- NA
return(X)
}
d <- 10
n <- 100
m <- 100
mu_1 <- rep(0,d)
sigma_1 <- diag(d)
X <- MASS::mvrnorm(n, mu_1, sigma_1)
mu_2 <- rep(0,d)
sigma_2 <- diag(d)
Y <- MASS::mvrnorm(n, mu_2, sigma_2)
S <- 0.05
s <- 0.2
Incomplete_X <- MCAR_Multivariate(X,S,s)
Incomplete_Y <- MCAR_Multivariate(Y,S,s)
beta <- MedianHeuristic(Incomplete_X[!is.na(rowSums(Incomplete_X)),],Incomplete_Y[!is.na(rowSums(Incomplete_Y)),])
res_case_4 <- permutation_testing_with_missing_data(Incomplete_X, Incomplete_Y, beta, perm = 100)
res_case_4$stat
res_case_4$pval
## case 5:  samples are all observed
d <- 50
n <- 100
m <- 100
mu_1 <- rep(0,d)
sigma_1 <- diag(d)
X <- MASS::mvrnorm(n, mu_1, sigma_1)
mu_2 <- rep(0,d)
sigma_2 <- diag(d)
Y <- MASS::mvrnorm(n, mu_2, sigma_2)
beta <- MedianHeuristic(X,Y)
res_case_3 <- testing_with_missing_using_CLT(X, Y, beta)
res_case_3$stat
res_case_3$pval
# compute the upper bounds of variance
bounds_matrix <- compute_kernel_matrix_min_max(X,Y,beta)
kernel_matrix_min <- bounds_matrix$kernel_matrix_min
kernel_matrix_max <- bounds_matrix$kernel_matrix_max
max_var <- var_max_estimator(kernel_matrix_min, kernel_matrix_max)
# compute lower bounds of MMD
n <- dim(X)[1]
m <- dim(Y)[1]
c_n_m <- 2/(n*(n-1)) + 4/(n*m) + 2/(m*(m-1))
res_case_3 <- testing_with_missing_using_CLT(X, Y, beta)
res_case_3$stat
res_case_3$pval
############## Compute $p$-value of proposed method in the presence missing data.
rm(list = ls())
set.seed(0)
source('MMD using permutation with Missing data.R')
source('MMD using CLT with Missing Data.R')
## case 1: samples are all observed
n <- 100
m <- 100
X <- rnorm(n, 0, 1)
Y <- rnorm(m, 0, 1)
beta <- MedianHeuristic(X,Y)
res_case_1 <- permutation_testing_with_missing_data(X,Y,beta,perm = 200)
res_case_1$stat
res_case_1$pval
## case 2: samples with missing data
MCAR_univariate <- function(X,s){
# given X, return incomplete X with s proportion of missing data
n <- length(X)
missing_location <- sample(1:n, s*n)
X[missing_location] <- NA
return(X)
}
n <- 100
m <- 100
X <- rnorm(n, 0, 1)
Y <- rnorm(m, 1, 1)
s <- 0.05
Incomplete_X <- MCAR_univariate(X, s)
Incomplete_Y <- MCAR_univariate(Y, s)
beta <- MedianHeuristic(Incomplete_X[!is.na(Incomplete_X)], Incomplete_Y[!is.na(Incomplete_Y)])
res_case_2 <- permutation_testing_with_missing_data(Incomplete_X,Incomplete_Y,beta,perm = 100)
res_case_2$stat
res_case_2$pval
## case 3:  samples are all observed
d <- 10
n <- 100
m <- 100
mu_1 <- rep(0,d)
sigma_1 <- diag(d)
X <- MASS::mvrnorm(n, mu_1, sigma_1)
mu_2 <- rep(0,d)
sigma_2 <- diag(d)
Y <- MASS::mvrnorm(n, mu_2, sigma_2)
beta <- MedianHeuristic(X,Y)
res_case_3 <- permutation_testing_with_missing_data(X, Y, beta, perm = 100)
res_case_3$stat
res_case_3$pval
## case 4:  not all samples are completely observed
MCAR_Multivariate <- function(X,S,s){
# given X, return incomplete X with S proportion of incompletely observed samples,
# each incomplete sample with s proportion of unobserved dimensions
n <- dim(X)[1]
d <- dim(X)[2]
missing_location <- sample(1:n, S*n)
X[missing_location, sample(1:d, s*d)] <- NA
return(X)
}
d <- 10
n <- 100
m <- 100
mu_1 <- rep(0,d)
sigma_1 <- diag(d)
X <- MASS::mvrnorm(n, mu_1, sigma_1)
mu_2 <- rep(0,d)
sigma_2 <- diag(d)
Y <- MASS::mvrnorm(n, mu_2, sigma_2)
S <- 0.05
s <- 0.2
Incomplete_X <- MCAR_Multivariate(X,S,s)
Incomplete_Y <- MCAR_Multivariate(Y,S,s)
beta <- MedianHeuristic(Incomplete_X[!is.na(rowSums(Incomplete_X)),],Incomplete_Y[!is.na(rowSums(Incomplete_Y)),])
res_case_4 <- permutation_testing_with_missing_data(Incomplete_X, Incomplete_Y, beta, perm = 100)
res_case_4$stat
res_case_4$pval
## case 5:  samples are all observed
d <- 50
n <- 100
m <- 100
mu_1 <- rep(0,d)
sigma_1 <- diag(d)
X <- MASS::mvrnorm(n, mu_1, sigma_1)
mu_2 <- rep(0,d)
sigma_2 <- diag(d)
Y <- MASS::mvrnorm(n, mu_2, sigma_2)
beta <- MedianHeuristic(X,Y)
res_case_5 <- testing_with_missing_using_CLT(X, Y, beta)
res_case_5$stat
res_case_5$pval
d <- 50
n <- 100
m <- 100
mu_1 <- rep(0,d)
sigma_1 <- diag(d)
X <- MASS::mvrnorm(n, mu_1, sigma_1)
mu_2 <- rep(1,d)
sigma_2 <- diag(d)
Y <- MASS::mvrnorm(n, mu_2, sigma_2)
S <- 0.05
s <- 0.2
Incomplete_X <- MCAR_Multivariate(X,S,s)
Incomplete_Y <- MCAR_Multivariate(Y,S,s)
beta <- MedianHeuristic(Incomplete_X[!is.na(rowSums(Incomplete_X)),],Incomplete_Y[!is.na(rowSums(Incomplete_Y)),])
res_case_6 <- testing_with_missing_using_CLT(Incomplete_X, Incomplete_Y, beta)
res_case_6$stat
res_case_6$pval
############## Compute $p$-value of proposed method in the presence missing data.
rm(list = ls())
set.seed(0)
source('MMD using permutation with Missing data.R')
source('MMD using CLT with Missing Data.R')
## case 4:  not all samples are completely observed
MCAR_Multivariate <- function(X,S,s){
# given X, return incomplete X with S proportion of incompletely observed samples,
# each incomplete sample with s proportion of unobserved dimensions
n <- dim(X)[1]
d <- dim(X)[2]
missing_location <- sample(1:n, S*n)
X[missing_location, sample(1:d, s*d)] <- NA
return(X)
}
d <- 50
n <- 100
m <- 100
mu_1 <- rep(0,d)
sigma_1 <- diag(d)
X <- MASS::mvrnorm(n, mu_1, sigma_1)
mu_2 <- rep(0,d)
sigma_2 <- diag(d)
Y <- MASS::mvrnorm(n, mu_2, sigma_2)
S <- 0.05
s <- 0.2
Incomplete_X <- MCAR_Multivariate(X,S,s)
Incomplete_Y <- MCAR_Multivariate(Y,S,s)
beta <- MedianHeuristic(Incomplete_X[!is.na(rowSums(Incomplete_X)),],Incomplete_Y[!is.na(rowSums(Incomplete_Y)),])
beta
res_case_6 <- testing_with_missing_using_CLT(Incomplete_X, Incomplete_Y, beta)
res_case_6$stat
res_case_6$pval
d <- 50
n <- 100
m <- 100
mu_1 <- rep(0,d)
sigma_1 <- diag(d)
X <- MASS::mvrnorm(n, mu_1, sigma_1)
mu_2 <- rep(0.5,d)
sigma_2 <- diag(d)
Y <- MASS::mvrnorm(n, mu_2, sigma_2)
S <- 0.05
s <- 0.2
Incomplete_X <- MCAR_Multivariate(X,S,s)
Incomplete_Y <- MCAR_Multivariate(Y,S,s)
beta <- MedianHeuristic(Incomplete_X[!is.na(rowSums(Incomplete_X)),],Incomplete_Y[!is.na(rowSums(Incomplete_Y)),])
res_case_6 <- testing_with_missing_using_CLT(Incomplete_X, Incomplete_Y, beta)
res_case_6$stat
res_case_6$pval
d <- 50
n <- 100
m <- 100
mu_1 <- rep(0,d)
sigma_1 <- diag(d)
X <- MASS::mvrnorm(n, mu_1, sigma_1)
mu_2 <- rep(0.8,d)
sigma_2 <- diag(d)
Y <- MASS::mvrnorm(n, mu_2, sigma_2)
S <- 0.05
s <- 0.2
Incomplete_X <- MCAR_Multivariate(X,S,s)
Incomplete_Y <- MCAR_Multivariate(Y,S,s)
beta <- MedianHeuristic(Incomplete_X[!is.na(rowSums(Incomplete_X)),],Incomplete_Y[!is.na(rowSums(Incomplete_Y)),])
res_case_6 <- testing_with_missing_using_CLT(Incomplete_X, Incomplete_Y, beta)
res_case_6$stat
res_case_6$pval
############## Compute $p$-value of proposed method in the presence missing data.
rm(list = ls())
set.seed(0)
source('MMD using permutation with Missing data.R')
source('MMD using CLT with Missing Data.R')
## case 1: samples are all observed
n <- 100
############## Compute $p$-value of proposed method in the presence missing data.
rm(list = ls())
set.seed(0)
source('MMD using permutation with Missing data.R')
source('MMD using CLT with Missing Data.R')
source('MMD using CLT with Missing data.R')
## case 1: samples are all observed
n <- 100
m <- 100
X <- rnorm(n, 0, 1)
Y <- rnorm(m, 0, 1)
beta <- MedianHeuristic(X,Y)
res_case_1 <- permutation_testing_with_missing_data(X,Y,beta,perm = 200)
res_case_1$stat
res_case_1$pval
## case 2: samples with missing data
MCAR_univariate <- function(X,s){
# given X, return incomplete X with s proportion of missing data
n <- length(X)
missing_location <- sample(1:n, s*n)
X[missing_location] <- NA
return(X)
}
n <- 100
m <- 100
X <- rnorm(n, 0, 1)
Y <- rnorm(m, 1, 1)
s <- 0.05
Incomplete_X <- MCAR_univariate(X, s)
Incomplete_Y <- MCAR_univariate(Y, s)
beta <- MedianHeuristic(Incomplete_X[!is.na(Incomplete_X)], Incomplete_Y[!is.na(Incomplete_Y)])
res_case_2 <- permutation_testing_with_missing_data(Incomplete_X,Incomplete_Y,beta,perm = 100)
res_case_2$stat
res_case_2$pval
## case 3:  samples are all observed
d <- 10
n <- 100
m <- 100
mu_1 <- rep(0,d)
sigma_1 <- diag(d)
X <- MASS::mvrnorm(n, mu_1, sigma_1)
mu_2 <- rep(0,d)
sigma_2 <- diag(d)
Y <- MASS::mvrnorm(n, mu_2, sigma_2)
beta <- MedianHeuristic(X,Y)
res_case_3 <- permutation_testing_with_missing_data(X, Y, beta, perm = 100)
res_case_3$stat
res_case_3$pval
## case 4:  not all samples are completely observed
MCAR_Multivariate <- function(X,S,s){
# given X, return incomplete X with S proportion of incompletely observed samples,
# each incomplete sample with s proportion of unobserved dimensions
n <- dim(X)[1]
d <- dim(X)[2]
missing_location <- sample(1:n, S*n)
X[missing_location, sample(1:d, s*d)] <- NA
return(X)
}
d <- 10
n <- 100
m <- 100
mu_1 <- rep(0,d)
sigma_1 <- diag(d)
X <- MASS::mvrnorm(n, mu_1, sigma_1)
mu_2 <- rep(1,d)
sigma_2 <- diag(d)
Y <- MASS::mvrnorm(n, mu_2, sigma_2)
S <- 0.05
s <- 0.2
Incomplete_X <- MCAR_Multivariate(X,S,s)
Incomplete_Y <- MCAR_Multivariate(Y,S,s)
beta <- MedianHeuristic(Incomplete_X[!is.na(rowSums(Incomplete_X)),],Incomplete_Y[!is.na(rowSums(Incomplete_Y)),])
res_case_4 <- permutation_testing_with_missing_data(Incomplete_X, Incomplete_Y, beta, perm = 100)
res_case_4
res_case_4$stat
res_case_4$pval
## case 5:  samples are all observed
d <- 50
n <- 100
m <- 100
mu_1 <- rep(0,d)
sigma_1 <- diag(d)
X <- MASS::mvrnorm(n, mu_1, sigma_1)
mu_2 <- rep(0,d)
sigma_2 <- diag(d)
Y <- MASS::mvrnorm(n, mu_2, sigma_2)
beta <- MedianHeuristic(X,Y)
res_case_5 <- testing_with_missing_using_CLT(X, Y, beta)
res_case_5$stat
res_case_5$pval
d <- 50
n <- 100
m <- 100
mu_1 <- rep(0,d)
sigma_1 <- diag(d)
X <- MASS::mvrnorm(n, mu_1, sigma_1)
mu_2 <- rep(0.8,d)
sigma_2 <- diag(d)
Y <- MASS::mvrnorm(n, mu_2, sigma_2)
S <- 0.05
s <- 0.2
Incomplete_X <- MCAR_Multivariate(X,S,s)
Incomplete_Y <- MCAR_Multivariate(Y,S,s)
beta <- MedianHeuristic(Incomplete_X[!is.na(rowSums(Incomplete_X)),],Incomplete_Y[!is.na(rowSums(Incomplete_Y)),])
res_case_6 <- testing_with_missing_using_CLT(Incomplete_X, Incomplete_Y, beta)
res_case_6$stat
res_case_6$pval
