library(ggplot2)
library(latex2exp)
library(pracma)
library(tilting)
library(matrixStats)

my_sgd <- function(x, y, model, t1, eta, alpha, C, theta0, nsave){
  n = nrow(x)
  d <- ncol(x)
  all_stepsize = eta
  theta = theta0
  iter = 0
  
  if (model == "lm") {
    getGradient <- function(t, x1, y1) { x1 * (dot(t, x1) - y1) }
  } 
  if (model == "log") {
    # getGradient <- function(theta, x1, y1) { -y1 * x1 / (1 + exp(y1 * sum(theta * x1))) }         # if y = +1 or -1
    getGradient <- function(t, x1, y1) { -y1*x1 + x1/(1 + exp(-dot(t, x1))) }              # if y = 0 or 1
  }
  if (model == 'svm'){
    getGradient <- function(t, x1, y1) {
      if(y1*dot(t, x1) >= 1){              
        return(t)
      }
      if(y1*dot(t, x1) < 1){              
        return(t - C*y1*x1)
      }
    }
  }
  
  theta_temp = theta0
  for (i in 1:t1) {
    idx <- sample(n, 1)
    stepsize = eta/(i^alpha)
    theta_temp = theta_temp - stepsize*getGradient(theta_temp, x[idx, ], y[idx])
    if(i%%nsave == 0){
      all_stepsize = c(all_stepsize, stepsize)
      theta = rbind(theta, theta_temp)
    }
  }
  
  out = list()
  out$theta = theta
  out$step_size = all_stepsize
  out
}


# Histogram Introduction
n = 1000
p = 10
theta_star = rep(1, p)
theta0 = rep(0.5, p)
sigma <- 1
x <- matrix(rnorm(n * p, sd = 1), n, p)
y <- as.numeric(x %*% theta_star + rnorm(n, 0, sigma))
model = 'lm'
l = 100
eta1 = 1
eta2 = 0.1
B = 1000
dk1 = c()
for(b in 1:B){
  sgd1 = my_sgd(x, y, model = model, t1 = l, eta = eta1, alpha = 0, theta0 = theta0, nsave=l)
  sgd2 = my_sgd(x, y, model = model, t1 = l, eta = eta1, alpha = 0, theta0 = theta0, nsave=l)
  u = sgd1$theta[1,] - sgd1$theta[2,]
  v = sgd2$theta[1,] - sgd2$theta[2,]
  dk1 = c(dk1, dot(u, v)/(norm(u, type = '2')*norm(v, type = '2')))
}
dk2 = c()
for(b in 1:B){
  sgd1 = my_sgd(x, y, model = model, t1 = l, eta = eta2, alpha = 0, theta0 = theta0, nsave=l)
  sgd2 = my_sgd(x, y, model = model, t1 = l, eta = eta2, alpha = 0, theta0 = theta0, nsave=l)
  u = sgd1$theta[1,] - sgd1$theta[2,]
  v = sgd2$theta[1,] - sgd2$theta[2,]
  dk2 = c(dk2, dot(u, v)/(norm(u, type = '2')*norm(v, type = '2')))
}

dat <- data.frame(c(dk2, dk1), c(rep('0.1', B), rep('1', B)))
names(dat) = c('d', 'eta')
my.cols = c("#d95f02", "#7570b3")
ggplot(dat, aes(x=d, fill=eta)) + 
  geom_histogram(bins = 30, position="dodge", aes(y = 2*..count../sum(..count..)), 
                 size = .5, colour = 'black') + 
  ylab('Frequency') +  
  labs(fill = TeX("learning rate")) +
  scale_fill_manual(values=my.cols) +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5, size = 27),legend.position = c(0.15, 0.85), 
        legend.background = element_rect(colour = 'black'), legend.title = element_text(size = 18),
        legend.key.size = unit(1, "cm"), legend.text = element_text(size = 18),
        axis.text = element_text(size=16), axis.title = element_text(size=18),
        axis.title.x=element_blank()) +
  xlim(c(-1,1))

#ggsave('histogram_introduction.png', width = 20, height = 15, units = "cm", dpi = 300)



#####################################################
#####################################################
# Histograms for Theoretical Guarantees
# linear, fix eta
n = 1000
p = 10
k = 2
t2 = 0
t3 = 5000
theta_star = rep(1, p)
theta0 = rep(0, p)
sigma <- 1
x <- matrix(rnorm(n * p, sd = 1), n, p)
y <- as.numeric(x %*% theta_star + rnorm(n, 0, sigma))
model = 'lm'
l = 50
eta = 0.01
B = 1000
Qt1 = c()
Qt2 = c()
for(b in 1:B){
  sgd1 = my_sgd(x, y, model = model, t1 = t3 + k*l, eta = eta, alpha = 0, theta0 = theta0, nsave = l)
  sgd2 = my_sgd(x, y, model = model, t1 = k*l, eta = eta, alpha = 0, theta0 = sgd1$theta[t2/l+1,], nsave = l)
  sgd3 = my_sgd(x, y, model = model, t1 = k*l, eta = eta, alpha = 0, theta0 = sgd1$theta[t3/l+1,], nsave = l)
  u1 = sgd1$theta[t2/l+k,] - sgd1$theta[t2/l+k+1,]
  v1 = sgd2$theta[k,] - sgd2$theta[k+1,]
  Qt1 = c(Qt1, dot(u1, v1)/(norm(u1, type = '2')*norm(v1, type = '2')))
  u2 = sgd1$theta[t3/l+k,] - sgd1$theta[t3/l+k+1,]
  v2 = sgd3$theta[k,] - sgd3$theta[k+1,]
  Qt2 = c(Qt2, dot(u2, v2)/(norm(u2, type = '2')*norm(v2, type = '2')))
  if(b %% 100 == 0){
    cat(b, '\n')
  }
}

dat <- data.frame(c(Qt1, Qt2), c(rep('Qt1', B), rep('Qt2', B)))
names(dat) = c('d', 'k')

my.labs = list(TeX('0'), TeX('5000'))
ggplot(dat, aes(x=d, fill=k)) + 
  geom_histogram(bins = 20, position="dodge", aes(y = 2*..count../sum(..count..)), 
                 size = 0.5, colour = 'black') + 
  xlab(TeX('Normalized $Q_2$')) + ylab('Frequency') +
  scale_fill_manual(values=c("#d95f02","#7570b3"), labels=my.labs) +
  labs(fill = TeX("  $ \\,  t_1$"), title = TeX('Linear Regression with $\\eta = 0.01$')) +
  theme_bw() +
  theme(legend.position = c(0.15, 0.85), plot.title = element_text(hjust = 0.5, size = 27),
        legend.background = element_rect(colour = 'black'), legend.title = element_text(size = 25),
        legend.key.size = unit(1, "cm"), legend.text = element_text(size = 20),
        axis.text = element_text(size=16), axis.title = element_text(size=18)) +
  xlim(c(-1,1))

#ggsave('hist_asymptotic_t_linear_normalized_thick.png', width = 18, height = 18, units = "cm", dpi = 300)


#####################################################
# logistic, fix eta
n = 1000
p = 10
k = 2
t2 = 0
t3 = 5000
theta_star = rep(1, p)
theta0 = rep(0, p)
sigma <- 1
x <- matrix(rnorm(n * p, sd = 1), n, p)
pr = 1/(1+exp(-(x %*% theta_star)))
y <- rbinom(n, 1, pr)
model = 'log'
l = 100

eta = 0.05
B = 1000

Qt1 = c()
Qt2 = c()
for(b in 1:B){
  sgd1 = my_sgd(x, y, model = model, t1 = t3 + k*l, eta = eta, alpha = 0, theta0 = theta0, nsave = l)
  sgd2 = my_sgd(x, y, model = model, t1 = k*l, eta = eta, alpha = 0, theta0 = sgd1$theta[t2/l+1,], nsave = l)
  sgd3 = my_sgd(x, y, model = model, t1 = k*l, eta = eta, alpha = 0, theta0 = sgd1$theta[t3/l+1,], nsave = l)
  u1 = sgd1$theta[t2/l+k,] - sgd1$theta[t2/l+k+1,]
  v1 = sgd2$theta[k,] - sgd2$theta[k+1,]
  Qt1 = c(Qt1, dot(u1, v1)/(norm(u1, type = '2')*norm(v1, type = '2')))
  u2 = sgd1$theta[t3/l+k,] - sgd1$theta[t3/l+k+1,]
  v2 = sgd3$theta[k,] - sgd3$theta[k+1,]
  Qt2 = c(Qt2, dot(u2, v2)/(norm(u2, type = '2')*norm(v2, type = '2')))
  if(b %% 100 == 0){
    cat(b, '\n')
  }
}

dat <- data.frame(c(Qt1, Qt2), c(rep('Qt1', B), rep('Qt2', B)))
names(dat) = c('d', 'k')

my.labs = list(TeX('0'), TeX('5000'))
ggplot(dat, aes(x=d, fill=k)) + 
  geom_histogram(bins = 20, position="dodge", aes(y = 2*..count../sum(..count..)), 
                 size = 0.5, colour = 'black') + 
  xlab(TeX('Normalized $Q_2$')) + ylab('Frequency') +
  scale_fill_manual(values=c("#d95f02","#7570b3"), labels=my.labs) +
  labs(fill = TeX("  $ \\,  t_1$"), title = TeX('Logistic Regression with $\\eta = 0.05$')) +
  theme_bw() +
  theme(legend.position = c(0.15, 0.85), plot.title = element_text(hjust = 0.5, size = 27),
        legend.background = element_rect(colour = 'black'), legend.title = element_text(size = 25),
        legend.key.size = unit(1, "cm"), legend.text = element_text(size = 20),
        axis.text = element_text(size=16), axis.title = element_text(size=18)) +
  xlim(c(-1,1))

#ggsave('hist_asymptotic_t_logistic_normalized_thick.png', width = 18, height = 18, units = "cm", dpi = 300)


#####################################################
# linear, fix t1
n = 1000
p = 10
k = 2
n0 = 50
theta_star = rep(1, p)
theta0 = rep(0, p)
sigma <- 1
x <- matrix(rnorm(n * p, sd = 1), n, p)
y <- as.numeric(x %*% theta_star + rnorm(n, 0, sigma))
model = 'lm'
l = 50
eta1 = 0.05
eta2 = 0.01
B = 1000
dk1 = c()
for(b in 1:B){
  sgd0 = my_sgd(x, y, model = model, t1 = n0, eta = eta1, alpha = 0, theta0 = theta0, nsave = l)
  sgd1 = my_sgd(x, y, model = model, t1 = k*l, eta = eta1, alpha = 0, theta0 = sgd0$theta[n0/l + 1,], nsave = l)
  sgd2 = my_sgd(x, y, model = model, t1 = k*l, eta = eta1, alpha = 0, theta0 = sgd0$theta[n0/l + 1,], nsave = l)
  u = sgd1$theta[k,] - sgd1$theta[k+1,]
  v = sgd2$theta[k,] - sgd2$theta[k+1,]
  dk1 = c(dk1, dot(u, v)/(norm(u, type = '2')*norm(v, type = '2')))
}
dk2 = c()
for(b in 1:B){
  sgd0 = my_sgd(x, y, model = model, t1 = n0, eta = eta2, alpha = 0, theta0 = theta0, nsave = l)
  sgd1 = my_sgd(x, y, model = model, t1 = k*l, eta = eta2, alpha = 0, theta0 = sgd0$theta[n0/l + 1,], nsave = l)
  sgd2 = my_sgd(x, y, model = model, t1 = k*l, eta = eta2, alpha = 0, theta0 = sgd0$theta[n0/l + 1,], nsave = l)
  u = sgd1$theta[k,] - sgd1$theta[k+1,]
  v = sgd2$theta[k,] - sgd2$theta[k+1,]
  dk2 = c(dk2, dot(u, v)/(norm(u, type = '2')*norm(v, type = '2')))
}

dat <- data.frame(c(dk1, dk2), c(rep('1', B), rep('2', B)))
names(dat) = c('d', 'eta')
my.cols = c("#7570b3", "#d95f02")
my.labs = c('0.05', '0.01')

ggplot(dat, aes(x=d, fill=eta)) + 
  geom_histogram(bins = 20, position="dodge", aes(y = 2*..count../sum(..count..)), 
                 size = .5, colour = 'black') + 
  ylab('Frequency') + xlab(TeX('Normalized $Q_2$')) + 
  labs(fill = TeX("$\\;\\eta$"), title = TeX('Linear Regression with $t_1 = 50$')) +
  scale_fill_manual(values=my.cols, labels = my.labs) +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5, size = 27),legend.position = c(0.15, 0.85), 
        legend.background = element_rect(colour = 'black'), legend.title = element_text(size = 25),
        legend.key.size = unit(1, "cm"), legend.text = element_text(size = 20),
        axis.text = element_text(size=16), axis.title = element_text(size=18)) +
  xlim(c(-1,1))

#ggsave('hist_asymptotic_eta_linear_normalized_thick.png', width = 18, height = 18, units = "cm", dpi = 300)







#####################################################
# logistic, fix t1
n = 1000
p = 10
k = 2
n0 = 50
theta_star = rep(1, p)
theta0 = rep(0, p)
sigma <- 1
x <- matrix(rnorm(n * p, sd = 1), n, p)
pr = 1/(1+exp(-(x %*% theta_star)))
y <- rbinom(n, 1, pr)
model = 'log'
l = 100
eta1 = 0.5
eta2 = 0.05
B = 1000
dk1 = c()
for(b in 1:B){
  sgd0 = my_sgd(x, y, model = model, t1 = n0, eta = eta1, alpha = 0, theta0 = theta0, nsave = n0)
  sgd1 = my_sgd(x, y, model = model, t1 = k*l, eta = eta1, alpha = 0, theta0 = sgd0$theta[2,], nsave = l)
  sgd2 = my_sgd(x, y, model = model, t1 = k*l, eta = eta1, alpha = 0, theta0 = sgd0$theta[2,], nsave = l)
  u = sgd1$theta[k,] - sgd1$theta[k+1,]
  v = sgd2$theta[k,] - sgd2$theta[k+1,]
  dk1 = c(dk1, dot(u, v)/(norm(u, type = '2')*norm(v, type = '2')))
}
dk2 = c()
for(b in 1:B){
  sgd0 = my_sgd(x, y, model = model, t1 = n0, eta = eta2, alpha = 0, theta0 = theta0, nsave = n0)
  sgd1 = my_sgd(x, y, model = model, t1 = k*l, eta = eta2, alpha = 0, theta0 = sgd0$theta[2,], nsave = l)
  sgd2 = my_sgd(x, y, model = model, t1 = k*l, eta = eta2, alpha = 0, theta0 = sgd0$theta[2,], nsave = l)
  u = sgd1$theta[k,] - sgd1$theta[k+1,]
  v = sgd2$theta[k,] - sgd2$theta[k+1,]
  dk2 = c(dk2, dot(u, v)/(norm(u, type = '2')*norm(v, type = '2')))
}

dat <- data.frame(c(dk1, dk2), c(rep('1', B), rep('2', B)))
names(dat) = c('d', 'eta')
my.cols = c("#7570b3", "#d95f02")
my.labs = c('0.5', '0.05')

ggplot(dat, aes(x=d, fill=eta)) + 
  geom_histogram(bins = 20, position="dodge", aes(y = 2*..count../sum(..count..)), 
                 size = .5, colour = 'black') + 
  ylab('Frequency') + xlab(TeX('Normalized $Q_2$')) + 
  labs(fill = TeX("$\\;\\eta$"), title = TeX('Logistic Regression with $t_1 = 50$')) +
  scale_fill_manual(values=my.cols, labels = my.labs) +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5, size = 27),legend.position = c(0.15, 0.85), 
        legend.background = element_rect(colour = 'black'), legend.title = element_text(size = 25),
        legend.key.size = unit(1, "cm"), legend.text = element_text(size = 20),
        axis.text = element_text(size=16), axis.title = element_text(size=18)) +
  xlim(c(-1,1))

#ggsave('hist_asymptotic_eta_logistic_normalized_thick.png', width = 18, height = 18, units = "cm", dpi = 300)

