c <- ccf(x,y)
c$acf
n <- 1000000
w <- rep(0,n)
x <- rep(0,n)
y <- rep(0,n)
epsw <- rnorm(n)^3
epsx <- rnorm(n)^3
epsy <- rnorm(n)^3
for(i in 3:n)
{
x[i] <- 0.3*x[i-1]+0.5*epsx[i]
y[i] <- 0.8*y[i-1]+0.8*x[i-1]+0.5*epsy[i]
w[i] <- -0.6*w[i-1]+0.8*x[i-2]+0.5*epsw[i]
}
c <- ccf(x,y)
y
c$acf
c <- ccf(x,y)
c$acf
a <- acf(cbind(x,y))
a
a <- acf(cbind(x,y))
a
1 %*% 1
1.0 %*% 1.2
1.0 * 1.2
ceiling(3.2)
eps = y
alpha = 0.05
max_lag = 10
corr1 <- ccf(x,eps,lag.max=max_lag,type = "correlation",plot = FALSE)
T <- max(abs(corr1$acf))
T
sigma <- matrix(0,2*max_lag,2*max_lag)
sigma
max_lag = 5
corr1 <- ccf(x,eps,lag.max=max_lag,type = "correlation",plot = FALSE)
T <- max(abs(corr1$acf))
sigma <- matrix(0,2*max_lag,2*max_lag)
T
acr <- acf(x,lag.max=2*max_lag,type = "correlation",plot=FALSE)
for(i in 1:(2*max_lag))
{
for(j in 1:i)
{
sigma[i,j]=acr$acf[1+(i-j)]
sigma[j,i]=acr$acf[1+(i-j)]
}
}
sigma <- sigma/length(x)
R <- chol(sigma)
R
num_simulations<-20000
z <- matrix(rnorm(2*max_lag*num_simulations),num_simulations,2*max_lag)
z <- z%*%R
z
maxz <- apply(abs(z),1,max)
maxz
z
maxz
abs(z)
maxz
length(maxz)
T
quan <- maxzorder[ceiling(num_simulations-alpha*num_simulations)]
quan
quan <- maxzorder[ceiling(num_simulations-alpha*num_simulations)]
maxzorder <- sort(maxz)
quan <- maxzorder[ceiling(num_simulations-alpha*num_simulations)]
quan
sum(maxzorder>T)
sum(maxzorder>T)/num_simulations
resu <- list(statistic = T, crit.value = quan, p.value = pval)
resu
pval <- sum(maxzorder>T)/num_simulations
resu <- list(statistic = T, crit.value = quan, p.value = pval)
resu
corr1 <- ccf(x,eps,lag.max=max_lag,type = "correlation",plot = FALSE)
T <- max(abs(corr1$acf))
sigma <- matrix(0,2*max_lag,2*max_lag)
# estimate cov of x-cor.
# Theorem 11.2.3 in brockwell and davis: "bartletts formula"
# H_0: rho_{12} == 0 => non-zero summands only for j = k - h
acr <- acf(x,lag.max=2*max_lag,type = "correlation",plot=FALSE)
for(i in 1:(2*max_lag))
{
for(j in 1:i)
{
sigma[i,j]=acr$acf[1+(i-j)]
sigma[j,i]=acr$acf[1+(i-j)]
}
}
sigma <- sigma/length(x)
R <- chol(sigma)
num_simulations<-20000
z <- matrix(rnorm(2*max_lag*num_simulations),num_simulations,2*max_lag)
z <- z%*%R
maxz <- apply(abs(z),1,max)
maxzorder <- sort(maxz)
quan <- maxzorder[ceiling(num_simulations-alpha*num_simulations)]
pval <- sum(maxzorder>T)/num_simulations
resu <- list(statistic = T, crit.value = quan, p.value = pval)
resu
n <- 10000
w <- rep(0,n)
x <- rep(0,n)
y <- rep(0,n)
epsw <- rnorm(n)^3
epsx <- rnorm(n)^3
epsy <- rnorm(n)^3
for(i in 3:n)
{
x[i] <- 0.3*x[i-1]+0.5*epsx[i]
y[i] <- 0.8*y[i-1]+0.8*x[i-1]+0.5*epsy[i]
w[i] <- -0.6*w[i-1]+0.8*x[i-2]+0.5*epsw[i]
}
x
eps = y
alpha = 0.05
max_lag = 5
corr1 <- ccf(x,eps,lag.max=max_lag,type = "correlation",plot = FALSE)
T <- max(abs(corr1$acf))
sigma <- matrix(0,2*max_lag,2*max_lag)
# estimate cov of x-cor.
# Theorem 11.2.3 in brockwell and davis: "bartletts formula"
# H_0: rho_{12} == 0 => non-zero summands only for j = k - h
acr <- acf(x,lag.max=2*max_lag,type = "correlation",plot=FALSE)
for(i in 1:(2*max_lag))
{
for(j in 1:i)
{
sigma[i,j]=acr$acf[1+(i-j)]
sigma[j,i]=acr$acf[1+(i-j)]
}
}
sigma <- sigma/length(x)
R <- chol(sigma)
num_simulations<-20000
z <- matrix(rnorm(2*max_lag*num_simulations),num_simulations,2*max_lag)
z <- z%*%R
maxz <- apply(abs(z),1,max)
maxzorder <- sort(maxz)
quan <- maxzorder[ceiling(num_simulations-alpha*num_simulations)]
pval <- sum(maxzorder>T)/num_simulations
resu <- list(statistic = T, crit.value = quan, p.value = pval)
resu
max(c(1,3,4))
max(c(1,3,4))
max(c(1,3,4))
max(c(1,3,4),1)
max(c(1,3,4),2)
max(c(1,3,4),10)
library("rjson")
setwd('/home/kassaad/Documents/Codes/Causality-time-series/data')
result <- fromJSON(file = "results-LAMROS(2).json")
j=0
indi = c()
for (i in result){
#  if (i['stype'] == "WBC"){
print(j)
j=j+1
print(i['source'])
indi = c(indi,i['source'])
#  }
}
j=0
ty = c()
for (i in result){
print(j)
j=j+1
print(i['stype'])
ty = c(ty,i['stype'])
}
unique_ind = unique(indi)
unique_ty = unique(ty)
which(indi=="1547746")
for (patient in unique_ind){
res_ind = c()
n <- 500
t0 <- character(1)
date <- character(1)
stype <- character(1)
title <- character(1)
val <- character(1)
i=1
for (res in result){
if (res['source'] == patient){
#if (as.Date(res['date']$date, format="%Y-%m-%d") <= as.Date(res['t0']$t0, format="%Y-%m-%d")){
t0[i] <- res$t0
date[i] <- res$date
stype[i] <- res$stype
title[i] <- res$title
val[i] <- res$val
i=i+1
#      }
}
}
df <- data.frame(date=date,
stype=stype,
title=title,
val=val, stringsAsFactors = FALSE)
#df[order(date),]
new_df <- data.frame()
for (d in unique(df$date)){
temp = df[which(df$date==d),]
for (s in temp$stype){
if (length(temp$title[which(temp$stype==s)])==1){
new_df[d,s] = temp$val[which(temp$stype==s)]
}
else{
new_df[d,s] = temp$val[which(temp$stype==s)][1]   #paste(temp$val[which(temp$stype==s)], collapse = ',')
}
}
}
new_df = new_df[order(as.Date(row.names(new_df), format="%Y-%m-%d")),]
nr <- nrow(new_df)
colSums(is.na(new_df))
delete_var <- colnames(new_df)[colSums(is.na(new_df))>nr/2]
new_df[delete_var] <- NULL
rowSums(is.na(new_df))
rownames(new_df) <- NULL
new_df <- new_df[complete.cases(new_df), ]
rowSums(is.na(new_df))
rownames(new_df) <- NULL
for (name in colnames(new_df)){
new_df[name]<-  as.numeric(unlist(new_df[name]))
}
if ("BLA" %in% colnames(new_df) & "WBC" %in% colnames(new_df)){
new_df$newvar <- new_df$WBC - new_df$BLA
}
nr <- nrow(new_df)
u <- c()
for (name in colnames(new_df)){
u <- c(u, dim(unique(new_df[name]))[1])
}
if (min(u)>3){
if (patient != "1568762" & patient != "330706" & patient!="1712131"& patient!="281749" & patient != "131005" & patient != "534153"& patient != "1535231"){
dag <- timino_dag(new_df, alpha = 0.05, max_lag = 4, model = traints_gam, indtest = indtestts_hsic, output = FALSE)
dag <- data.frame(dag)
colnames(dag) <- colnames(new_df)
rownames(dag) <- colnames(new_df)
for (i in colnames(dag)){
for (j in rownames(dag)){
if (! is.na(dag[i,j])){
dags[i,j] <-  dags[i,j] + dag[i,j]
}
}
}
print(dag)
print(nr)
k<-k+1
}
} else print("A smoothing variable encountered with 3 or less unique values: at least 4 needed")
print(k)
}
# Copyright (c) 2013-2013 Jonas Peters [peters@stat.math.ethz.ch]
# All rights reserved.  See the file COPYING for license terms.
setwd('/home/kassaad/Documents/Codes/R - codes/onlineCodeTimino/codeTimino')
library(gam)
library(kernlab)
# library(gptk)
source("granger_causality.R")
source("timino_causality.R")
source("./util/hammingDistance.R")
source("./util/indtestAll.R")
source("./util/indtestHsic.R")
source("./util/indtestPcor.R")
source("./util/TSindtest.R")
source("./util/fitting_ts.R")
for (patient in unique_ind){
res_ind = c()
n <- 500
t0 <- character(1)
date <- character(1)
stype <- character(1)
title <- character(1)
val <- character(1)
i=1
for (res in result){
if (res['source'] == patient){
#if (as.Date(res['date']$date, format="%Y-%m-%d") <= as.Date(res['t0']$t0, format="%Y-%m-%d")){
t0[i] <- res$t0
date[i] <- res$date
stype[i] <- res$stype
title[i] <- res$title
val[i] <- res$val
i=i+1
#      }
}
}
df <- data.frame(date=date,
stype=stype,
title=title,
val=val, stringsAsFactors = FALSE)
#df[order(date),]
new_df <- data.frame()
for (d in unique(df$date)){
temp = df[which(df$date==d),]
for (s in temp$stype){
if (length(temp$title[which(temp$stype==s)])==1){
new_df[d,s] = temp$val[which(temp$stype==s)]
}
else{
new_df[d,s] = temp$val[which(temp$stype==s)][1]   #paste(temp$val[which(temp$stype==s)], collapse = ',')
}
}
}
new_df = new_df[order(as.Date(row.names(new_df), format="%Y-%m-%d")),]
nr <- nrow(new_df)
colSums(is.na(new_df))
delete_var <- colnames(new_df)[colSums(is.na(new_df))>nr/2]
new_df[delete_var] <- NULL
rowSums(is.na(new_df))
rownames(new_df) <- NULL
new_df <- new_df[complete.cases(new_df), ]
rowSums(is.na(new_df))
rownames(new_df) <- NULL
for (name in colnames(new_df)){
new_df[name]<-  as.numeric(unlist(new_df[name]))
}
if ("BLA" %in% colnames(new_df) & "WBC" %in% colnames(new_df)){
new_df$newvar <- new_df$WBC - new_df$BLA
}
nr <- nrow(new_df)
u <- c()
for (name in colnames(new_df)){
u <- c(u, dim(unique(new_df[name]))[1])
}
if (min(u)>3){
if (patient != "1568762" & patient != "330706" & patient!="1712131"& patient!="281749" & patient != "131005" & patient != "534153"& patient != "1535231"){
dag <- timino_dag(new_df, alpha = 0.05, max_lag = 4, model = traints_gam, indtest = indtestts_hsic, output = FALSE)
dag <- data.frame(dag)
colnames(dag) <- colnames(new_df)
rownames(dag) <- colnames(new_df)
for (i in colnames(dag)){
for (j in rownames(dag)){
if (! is.na(dag[i,j])){
dags[i,j] <-  dags[i,j] + dag[i,j]
}
}
}
print(dag)
print(nr)
k<-k+1
}
} else print("A smoothing variable encountered with 3 or less unique values: at least 4 needed")
print(k)
}
for (patient in unique_ind){
res_ind = c()
n <- 500
t0 <- character(1)
date <- character(1)
stype <- character(1)
title <- character(1)
val <- character(1)
i=1
for (res in result){
if (res['source'] == patient){
#if (as.Date(res['date']$date, format="%Y-%m-%d") <= as.Date(res['t0']$t0, format="%Y-%m-%d")){
t0[i] <- res$t0
date[i] <- res$date
stype[i] <- res$stype
title[i] <- res$title
val[i] <- res$val
i=i+1
#      }
}
}
df <- data.frame(date=date,
stype=stype,
title=title,
val=val, stringsAsFactors = FALSE)
#df[order(date),]
new_df <- data.frame()
for (d in unique(df$date)){
temp = df[which(df$date==d),]
for (s in temp$stype){
if (length(temp$title[which(temp$stype==s)])==1){
new_df[d,s] = temp$val[which(temp$stype==s)]
}
else{
new_df[d,s] = temp$val[which(temp$stype==s)][1]   #paste(temp$val[which(temp$stype==s)], collapse = ',')
}
}
}
new_df = new_df[order(as.Date(row.names(new_df), format="%Y-%m-%d")),]
nr <- nrow(new_df)
colSums(is.na(new_df))
delete_var <- colnames(new_df)[colSums(is.na(new_df))>nr/2]
new_df[delete_var] <- NULL
rowSums(is.na(new_df))
rownames(new_df) <- NULL
new_df <- new_df[complete.cases(new_df), ]
rowSums(is.na(new_df))
rownames(new_df) <- NULL
for (name in colnames(new_df)){
new_df[name]<-  as.numeric(unlist(new_df[name]))
}
if ("BLA" %in% colnames(new_df) & "WBC" %in% colnames(new_df)){
new_df$newvar <- new_df$WBC - new_df$BLA
}
nr <- nrow(new_df)
u <- c()
for (name in colnames(new_df)){
u <- c(u, dim(unique(new_df[name]))[1])
}
if (min(u)>3){
if (patient != "1568762" & patient != "330706" & patient!="1712131"& patient!="281749" & patient != "131005" & patient != "534153"& patient != "1535231"){
dag <- timino_dag(new_df, alpha = 0.05, max_lag = 4, model = traints_gam, indtest = indtestts_hsic, output = FALSE)
dag <- data.frame(dag)
colnames(dag) <- colnames(new_df)
rownames(dag) <- colnames(new_df)
for (i in colnames(dag)){
for (j in rownames(dag)){
if (! is.na(dag[i,j])){
dags[i,j] <-  dags[i,j] + dag[i,j]
}
}
}
print(dag)
print(nr)
k<-k+1
}
} else print("A smoothing variable encountered with 3 or less unique values: at least 4 needed")
print(k)
}
k<-0
dags <- data.frame(matrix(0, nrow=16, ncol=16))
colnames(dags) <- c(unique_ty, "newvar")
rownames(dags) <- c(unique_ty, "newvar")
for (patient in unique_ind){
res_ind = c()
n <- 500
t0 <- character(1)
date <- character(1)
stype <- character(1)
title <- character(1)
val <- character(1)
i=1
for (res in result){
if (res['source'] == patient){
#if (as.Date(res['date']$date, format="%Y-%m-%d") <= as.Date(res['t0']$t0, format="%Y-%m-%d")){
t0[i] <- res$t0
date[i] <- res$date
stype[i] <- res$stype
title[i] <- res$title
val[i] <- res$val
i=i+1
#      }
}
}
df <- data.frame(date=date,
stype=stype,
title=title,
val=val, stringsAsFactors = FALSE)
#df[order(date),]
new_df <- data.frame()
for (d in unique(df$date)){
temp = df[which(df$date==d),]
for (s in temp$stype){
if (length(temp$title[which(temp$stype==s)])==1){
new_df[d,s] = temp$val[which(temp$stype==s)]
}
else{
new_df[d,s] = temp$val[which(temp$stype==s)][1]   #paste(temp$val[which(temp$stype==s)], collapse = ',')
}
}
}
new_df = new_df[order(as.Date(row.names(new_df), format="%Y-%m-%d")),]
nr <- nrow(new_df)
colSums(is.na(new_df))
delete_var <- colnames(new_df)[colSums(is.na(new_df))>nr/2]
new_df[delete_var] <- NULL
rowSums(is.na(new_df))
rownames(new_df) <- NULL
new_df <- new_df[complete.cases(new_df), ]
rowSums(is.na(new_df))
rownames(new_df) <- NULL
for (name in colnames(new_df)){
new_df[name]<-  as.numeric(unlist(new_df[name]))
}
if ("BLA" %in% colnames(new_df) & "WBC" %in% colnames(new_df)){
new_df$newvar <- new_df$WBC - new_df$BLA
}
nr <- nrow(new_df)
u <- c()
for (name in colnames(new_df)){
u <- c(u, dim(unique(new_df[name]))[1])
}
if (min(u)>3){
if (patient != "1568762" & patient != "330706" & patient!="1712131"& patient!="281749" & patient != "131005" & patient != "534153"& patient != "1535231"){
dag <- timino_dag(new_df, alpha = 0.05, max_lag = 4, model = traints_gam, indtest = indtestts_hsic, output = FALSE)
dag <- data.frame(dag)
colnames(dag) <- colnames(new_df)
rownames(dag) <- colnames(new_df)
for (i in colnames(dag)){
for (j in rownames(dag)){
if (! is.na(dag[i,j])){
dags[i,j] <-  dags[i,j] + dag[i,j]
}
}
}
print(dag)
print(nr)
k<-k+1
}
} else print("A smoothing variable encountered with 3 or less unique values: at least 4 needed")
print(k)
}
dags
