m <- read.table("../tables//statistical-data.csv", sep=";", as.is=T, header=T)
table(m$graph)

sim <- subset(m, graph=="simulated")

table(sim$method)

# I have to compare BFKL against Lorentz (discrete == 0)

sim$id_merge <- paste(sim$ssize, sim$sid, sim$stemp)


# this won't do much
dens_plo <- function (n, text) {
  lor <- subset(sim, discrete==0 & method == "lorentz" & dim==2)
  bfkl <- subset(sim, discrete==0 & method == "bfkl")
  plot(density(subset(bfkl, ssize == n)$time), main=text, col="blue", xlab=NA)
  lines(density(subset(lor, ssize == n)$time))
  #abline(v = 1, col="red", lwd=3, lty=2)
}

dens_plo <- function (n, text) {
  lor <- subset(sim, discrete==0 & method == "lorentz" & dim==2)
  bfkl <- subset(sim, discrete==0 & method == "bfkl")
  x <- merge(lor, bfkl, by="id_merge", suffixes=c("lor", "bfkl"))
  x$diff <- x$timelor - x$timebfkl
  plot(density(subset(x, ssizelor == n)$diff), main=text, col="blue", xlab=NA)
  #abline(v = 1, col="red", lwd=3, lty=2)
}

dens_plo(100, "n=100")
dens_plo(200, "n=200")
dens_plo(500, "n=500")
dens_plo(1000, "n=1000")
dens_plo(2000, "n=2000")

lor <- subset(sim, discrete==0 & method == "lorentz" & dim==2)
bfkl <- subset(sim, discrete==0 & method == "bfkl")

n <- read.table("../tables//precise-times.csv", sep=";", as.is=T, header=T)
n$id_merge <- paste(n$ssize, n$sid, n$stemp)
n <- data.frame(n$id_merge, n$bfkltime)
colnames(n) <- c("id_merge", "precise_time")
bfkl <- merge(bfkl, n, by="id_merge")

x <- merge(lor, bfkl, by="id_merge", suffixes=c("lor", "bfkl"))

# comparison of difference in time in division by graph size

x$ratio <- (x$timebfkl/x$timelor)*100
x$diff <- x$timelor - x$timebfkl
summary(x$ratio)
x$ratio_map <- (x$mapbfkl/x$maplor)
x$diff_map <- x$maplor - x$mapbfkl
summary(x$ratio_map)
x$ratio_mr <- (x$mrbfkl/x$mrlor)
x$diff_mr <- -log(x$mrlor) + log(x$mrbfkl)
x$ratio_greedy <- (x$successbfkl/x$successlor)
x$diff_greedy <- x$successlor - x$successbfkl
x$ratio_stretch <- (x$stretchbfkl/x$stretchlor)
x$diff_stretch <- -log(x$stretchlor) + log(x$stretchbfkl)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$ratio), main="comparison of time [s]", col="blue", xlab=NA, ylim = c(0,8), xlim=c(0,2))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$ratio), col="red")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$ratio), col="purple")
lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$ratio), col="yellow")
lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$ratio), col="green")
legend(1.25, 6, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
       col=c("blue", "red", "purple", "yellow", "green"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$ratio), main="comparison of time [s]", col="blue", xlab=NA, ylim = c(0,3.5), xlim=c(0,2))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$ratio), col="red")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$ratio), col="purple")
lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$ratio), col="yellow")
lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$ratio), col="green")
legend(1.25, 2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
       col=c("blue", "red", "purple", "yellow", "green"), lty=1)

# comparison of map in division by graph size

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$ratio_map), main="comparison of map", col="blue", xlab=NA)
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$ratio_map), col="red")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$ratio_map), col="purple")
lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$ratio_map), col="yellow")
lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$ratio_map), col="green")
abline(v=1)
legend(140, 0.055, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
       col=c("blue", "red", "purple", "yellow", "green"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$ratio_map), main="comparison of map", col="blue", xlab=NA, ylim=c(0,0.04), xlim=c(50,130))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$ratio_map), col="red")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$ratio_map), col="purple")
lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$ratio_map), col="yellow")
lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$ratio_map), col="green")
abline(v=100)
legend(105, 0.04, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
       col=c("blue", "red", "purple", "yellow", "green"), lty=1)

# diff in map

pdf("~/densities_MAP.pdf", 30,7)
par(mfrow=c(1,3))
plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$diff_map), main="comparison of MAP, T=0.1", col="blue", xlab=NA, xlim=c(-0.15, 0.15))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$diff_map), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$diff_map), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$diff_map), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$diff_map), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)


plot(density(subset(x, ssizelor == 2000 & stempbfkl == 4)$diff_map), main="comparison of MAP, T=0.4", col="blue", xlab=NA, xlim=c(-0.15, 0.20))
lines(density(subset(x, ssizelor == 1000 & stempbfkl ==4)$diff_map), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 4)$diff_map), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 4)$diff_map), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 4)$diff_map), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)


plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$diff_map), main="comparison of MAP, T=0.7", col="blue", xlab=NA, xlim=c(-0.1,0.2), ylim=c(0,23))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$diff_map), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$diff_map), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$diff_map), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$diff_map), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

dev.off()


# individual plots

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$maplor), col="blue")
lines(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$mapbfkl), col="black") 

plot(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$maplor), col="blue")
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$mapbfkl), col="black")

plot(density(subset(x, ssizelor == 500 & stempbfkl == 1)$maplor), col="blue")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$mapbfkl), col="black") 

plot(density(subset(x, ssizelor == 200 & stempbfkl == 1)$maplor), col="blue")
lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$mapbfkl), col="black") 

plot(density(subset(x, ssizelor == 100 & stempbfkl == 1)$maplor), col="blue")
lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$mapbfkl), col="black") 

table(x$methodbfkl, x$ssizebfkl)
table(x$methodlor, x$ssizelor)

plot(density(subset(m, ssize == 100 & stemp == 1 & method=="lorentz" & dim==2 & discrete==0)$map), col="blue")

# comparison of diff in mr

pdf("~/densities_MR.pdf", 30,7)
par(mfrow=c(1,3))
plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$diff_mr), main="comparison of -log(MR), T=0.1", col="blue", xlab=NA, ylim=c(0,1.1))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$diff_mr), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$diff_mr), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$diff_mr), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$diff_mr), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 4)$diff_mr), main="comparison of -log(MR), T=0.4", col="blue", xlab=NA, ylim=c(0,2.0))
lines(density(subset(x, ssizelor == 1000 & stempbfkl ==4)$diff_mr), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 4)$diff_mr), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 4)$diff_mr), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 4)$diff_mr), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$diff_mr), main="comparison of -log(MR), T=0.7", col="blue", xlab=NA, ylim=c(0,3.2))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$diff_mr), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$diff_mr), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$diff_mr), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$diff_mr), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)
dev.off()


# comparison of diff in greedy

pdf("~/densities_greedy.pdf", 30,7)
par(mfrow=c(1,3))
plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$diff_greedy), main="comparison of greedy success, T=0.1", col="blue", xlab=NA, xlim=c(-0.25,0.2), ylim=c(0,13))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$diff_greedy), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$diff_greedy), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$diff_greedy), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$diff_greedy), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 4)$diff_greedy), main="comparison of greedy success, T=0.4", col="blue", xlab=NA, xlim=c(-0.1,0.2), ylim=c(0,13))
lines(density(subset(x, ssizelor == 1000 & stempbfkl ==4)$diff_greedy), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 4)$diff_greedy), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 4)$diff_greedy), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 4)$diff_greedy), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$diff_greedy), main="comparison of greedy success, T=0.7", col="blue", xlab=NA, xlim=c(-0.1,0.2), ylim=c(0,13) )
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$diff_greedy), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$diff_greedy), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$diff_greedy), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$diff_greedy), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)
dev.off()

# comparison of diff in stretch

pdf("~/densities_stretch.pdf", 30,7)
par(mfrow=c(1,3))
plot(density(subset(x, ssizelor == 2000 & stempbfkl == 1)$diff_stretch), main="comparison of -log(stretch), T=0.1", col="blue", xlab=NA, ylim=c(0,13), xlim=c(-0.15,0.15))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 1)$diff_stretch), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 1)$diff_stretch), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 1)$diff_stretch), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 1)$diff_stretch), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 4)$diff_stretch), main="comparison of -log(stretch), T=0.4", col="blue", xlab=NA, ylim=c(0,15))
lines(density(subset(x, ssizelor == 1000 & stempbfkl ==4)$diff_stretch), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 4)$diff_stretch), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 4)$diff_stretch), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 4)$diff_stretch), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)

plot(density(subset(x, ssizelor == 2000 & stempbfkl == 7)$diff_stretch), main="comparison of -log(stretch), T=0.7", col="blue", xlab=NA, ylim=c(0,15))
lines(density(subset(x, ssizelor == 1000 & stempbfkl == 7)$diff_stretch), col="black")
lines(density(subset(x, ssizelor == 500 & stempbfkl == 7)$diff_stretch), col="red")
#lines(density(subset(x, ssizelor == 200 & stempbfkl == 7)$diff_stretch), col="yellow")
#lines(density(subset(x, ssizelor == 100 & stempbfkl == 7)$diff_stretch), col="green")
abline(v=0)
#legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500", "n=200", "n=100"),
#       col=c("blue", "red", "purple", "yellow", "green"), lty=1)
legend("topright", cex=2.5, legend=c("n=2000", "n=1000", "n=500"),
       col=c("blue", "black", "red"), lty=1)
dev.off()


# regressions

set.seed(17052023)
x$bfklwins_map <- as.factor(ifelse(x$diff_map<0, 1,0))
x$bfklwins_mr <- as.factor(ifelse(x$diff_mr<0, 1,0))
x$bfklwins_greedy <- as.factor(ifelse(x$diff_greedy<0, 1,0))
x$bfklwins_stretch <- as.factor(ifelse(x$diff_stretch<0, 1,0))

table(subset(x, stempbfkl==1)$bfklwins_map)
table(subset(x, stempbfkl==4)$bfklwins_map)
table(subset(x, stempbfkl==7)$bfklwins_map)

table(subset(x, stempbfkl==1)$bfklwins_mr)
table(subset(x, stempbfkl==4)$bfklwins_mr)
table(subset(x, stempbfkl==7)$bfklwins_mr)

xlog <- subset(x, ssizebfkl >=500)

library(caret)
train_control <- trainControl(method='cv', number=20)

kfold_train <- train(bfklwins_map~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, 
                     method='glm', family=binomial, trControl=train_control)

print(kfold_train)

kfold_train <- train(bfklwins_map~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, 
                     method='glm', family=binomial, trControl=train_control)

print(kfold_train)

reg <- glm(bfklwins_map~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, family=binomial)
summary(reg)

reg <- glm(bfklwins_map~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, family=binomial)
summary(reg)

library(lmtest)
lrtest(reg, . ~ . - nbfkl - mbfkl)

# for mr
#kfold_train <- train(bfklwins_mr~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, 
#                     method='glm', family=binomial, trControl=train_control)

#print(kfold_train)

kfold_train <- train(bfklwins_mr~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, 
                     method='glm', family=binomial, trControl=train_control)

print(kfold_train)

#reg <- glm(bfklwins_mr~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, family=binomial)
#summary(reg)

reg <- glm(bfklwins_mr~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, family=binomial)
summary(reg)

# for greedy
library(caret)
train_control <- trainControl(method='cv', number=20)

#kfold_train <- train(bfklwins_greedy~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, 
#                    method='glm', family=binomial, trControl=train_control)
#print(kfold_train)

kfold_train <- train(bfklwins_greedy~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, 
                     method='glm', family=binomial, trControl=train_control)

print(kfold_train)

#reg <- glm(bfklwins_greedy~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, family=binomial)
#summary(reg)

reg <- glm(bfklwins_greedy~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, family=binomial)
summary(reg)

# for stretch
library(caret)
train_control <- trainControl(method='cv', number=20)

#kfold_train <- train(bfklwins_greedy~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, 
#                    method='glm', family=binomial, trControl=train_control)
#print(kfold_train)

kfold_train <- train(bfklwins_stretch~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, 
                     method='glm', family=binomial, trControl=train_control)

print(kfold_train)

#reg <- glm(bfklwins_greedy~as.factor(stempbfkl) + nbfkl + mbfkl, data=xlog, family=binomial)
#summary(reg)

reg <- glm(bfklwins_stretch~as.factor(stempbfkl) + as.factor(ssizebfkl), data=xlog, family=binomial)
summary(reg)



# scatterplot for time vs quality

# change to precise

xlog$timeloss <- (xlog$timelor/xlog$precise_time)


lorbet_map <- subset(xlog, bfklwins_map==0)
lorbet_map$pergain <- 100*((lorbet_map$maplor/lorbet_map$mapbfkl)-1)

pdf("~/gain_MAP01.pdf", 10,7)
plot(subset(lorbet_map,stempbfkl==1)$pergain, subset(lorbet_map,stempbfkl==1)$timeloss, xlab = "Percentage gain in quality", ylab = "How many times longer it takes",  main="T=0.1")
dev.off()

cor.test(subset(lorbet_map,stempbfkl==1)$pergain, subset(lorbet_map,stempbfkl==1)$timeloss, method="kendall")

pdf("~/gain_MAP04.pdf", 10,7)
plot(subset(lorbet_map,stempbfkl==4)$pergain, subset(lorbet_map,stempbfkl==4)$timeloss,xlab = "Percentage gain in quality", ylab = "How many times longer it takes", main="T=0.4")
dev.off()

cor.test(subset(lorbet_map,stempbfkl==4)$pergain, subset(lorbet_map,stempbfkl==4)$timeloss, method="kendall")

pdf("~/gain_MAP07.pdf", 10,7)
plot(subset(lorbet_map,stempbfkl==7)$pergain, subset(lorbet_map,stempbfkl==7)$timeloss, xlab = "Percentage gain in quality", ylab = "How many times longer it takes",  main="T=0.7")
dev.off()

cor.test(subset(lorbet_map,stempbfkl==7)$pergain, subset(lorbet_map,stempbfkl==7)$timeloss, method="kendall")

# table with real graphs
q <- read.table("../tables//statistical-data.csv", sep=";", as.is=T, header=T)
q <- subset(q, graph!="simulated")

q <- subset(q, q$sid==0)
q <- unique(q)

q$graph <- as.factor(q$graph)
q <- subset(q, graph!="mammal")
q$graph <- factor(q$graph, levels = c("astroph", "condmat", "grqc", "hepph", "facebook", "yeast", "diseasome", "celegans", "human1", "drosophila1", "mouse3", "noun", "verbf", "acm", "mesh", "tetrapoda", "csphd"))

q <- subset(q, q$name != "X")

library(ggplot2)

pdf("~/real-world_MAP.pdf", 10,7)
ggplot(subset(q, name!="X"), aes(x=graph, y=map)) +
  geom_point(aes(shape=name), size=3)+ scale_shape_manual(values=c(19,15,5,6,3,4,8,1,2,7)) + xlab("graph") + ylab("MAP") +
  scale_x_discrete(guide = guide_axis(angle = 90)) + theme_minimal(base_size = 15)
dev.off()

ggplot(subset(q, name!="X"), aes(x=graph, y=mr)) +
  geom_point(aes(shape=name), size=3)+ scale_shape_manual(values=c(19,15,5,6,3,4,8,1,2,7)) + xlab("graph") + ylab("MR") +
  scale_x_discrete(guide = guide_axis(angle = 90)) + theme_minimal()

pdf("~/real-world_MR.pdf", 10,7)
ggplot(subset(q, name!="X"), aes(x=graph, y=log(1/mr))) +
  geom_point(aes(shape=name), size=3)+ scale_shape_manual(values=c(19,15,5,6,3,4,8,1,2,7)) + xlab("graph") + ylab("-log(MR)") +
  scale_x_discrete(guide = guide_axis(angle = 90)) + theme_minimal(base_size = 15)
dev.off()

hierarchies <- c("noun", "verbf", "acm", "mesh", "tetrapoda", "csphd")
q <- subset(q, !( q$graph  %in% hierarchies))

pdf("~/real-world_greedy.pdf", 10,7)
ggplot(subset(q, q$stretch!=0), aes(x=graph, y=success)) +
  geom_point(aes(shape=name), size=3)+ scale_shape_manual(values=c(19,15,5,6,3,4,8,1,2,7)) + xlab("graph") + ylab("greedy success") +
  scale_x_discrete(guide = guide_axis(angle = 90)) + theme_minimal(base_size = 15)
dev.off()

pdf("~/real-world_stretch.pdf", 10,7)
ggplot(subset(q, q$stretch!=0), aes(x=graph, y=log(1/stretch))) +
  geom_point(aes(shape=name), size=3)+ scale_shape_manual(values=c(19,15,5,6,3,4,8,1,2,7)) + xlab("graph") + ylab("-log(stretch)") +
  scale_x_discrete(guide = guide_axis(angle = 90)) + theme_minimal(base_size = 15)
dev.off()


