## here is some code to parse the datasets from the repository of Tubingen

## reading the files from 

text="https://webdav.tuebingen.mpg.de/cause-effect/pair"
dat = vector(mode = "list",length = 0)
blacklist = vector(mode="list",length = 0)

# sorting out the datasets with incorrect format

for (i in 1:9){
  line = "000"
  filename = paste(line,as.character(i),".txt",sep="")
  sample_data = read.csv(paste(text,filename,sep = ""),header=FALSE, dec=".",sep = " ")
  if (ncol(sample_data)>2)
  {
    blacklist = append(blacklist,i,0)
  }
  else {
    dat = append(dat,list(sample_data),0)
  }
  print(paste("Size of dataset",as.character(i)))
  cat(nrow(sample_data),ncol(sample_data))
  name=paste("causal_tubingen00",as.character(i),".csv",sep="")
  write.table(sample_data,file = name,sep = ",",row.names = FALSE,col.names = FALSE)
  writeLines("\n")
}


for (i in 10:99){
  line = "00"
  filename = paste(line,as.character(i),".txt",sep="")
  sample_data = read.csv(paste(text,filename,sep = ""),header=FALSE, dec=".",sep = " ")
  #dat = append(dat,list(sample_data),0)
  print(paste("Size of dataset",as.character(i)))
  if (ncol(sample_data)>2)
  {
    blacklist = append(blacklist,i,0)
  }
  else {
    dat = append(dat,list(sample_data),0)
    name=paste("causal_tubingen0",as.character(i),".csv",sep="")
    write.table(sample_data,file = name,sep = ",",row.names = FALSE,col.names = FALSE)
  }
  
  cat(nrow(sample_data),ncol(sample_data))
  
  
  writeLines("\n")
 
}

for (i in 100:108){
  line = "0"
  filename = paste(line,as.character(i),".txt",sep="")
  sample_data = read.csv(paste(text,filename,sep = ""),sep = " ")
  
  print(paste("Size of dataset",as.character(i)))
  cat(nrow(sample_data),ncol(sample_data))
  writeLines("\n")
  if (ncol(sample_data)>2)
  {
    blacklist = append(blacklist,i,0)
  }
  else {
    dat = append(dat,list(sample_data),0)
    name=paste("causal_tubingen",as.character(i),".csv",sep="")
    write.table(sample_data,file = name,sep = ",",row.names = FALSE,col.names = FALSE)
  }
}

whitelist = setdiff(c(1:108),blacklist)

# writing all of avalaible datasets to one .csv file

lapply(1:length(dat), function(x) write.table(t(as.data.frame(dat[x])), 
                                              'causal_tubingenALL.csv', append= T, sep=',', 
                                              quote = F, col.names = F))


# writng the files to the separate files and collecting their variable names for ground truth
for (i in whitelist){
  name= paste("causal_tubingen",as.character(i),".csv",sep="")
  write.table(dat[1],file = name,sep = ",",row.names = FALSE,col.names = FALSE)
}

for (i in whitelist){
  write.table(i,"ct_testnames.txt",append = TRUE,sep = "",row.names = FALSE,col.names = FALSE)
}


########### some example of json file

line = "000"
filename = paste(line,as.character(1),".txt",sep="")

text="https://webdav.tuebingen.mpg.de/cause-effect/pair"


df = read.csv(paste(text,filename,sep = ""),header=FALSE, dec=".",sep = " ")
download.file("https://zenodo.org/record/5909090/files/gold_standard_datasets.zip?download=1",destfile = "sample.zip")
