{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## A1A Dataset sensitivity c0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from numpy import linalg as la\n",
    "from methods import SGD, SGD_decr, SPS_decr\n",
    "import matplotlib.pyplot as plt\n",
    "from scipy.signal import savgol_filter\n",
    "from sklearn.datasets import load_svmlight_file\n",
    "\n",
    "import sys\n",
    "import math\n",
    "import seaborn as sns\n",
    "import random\n",
    "from tqdm import tqdm\n",
    "%config InlineBackend.figure_format = 'svg'\n",
    "\n",
    "markers = [\"v\",\"^\",\"<\",\">\",\"o\",\"s\",\"p\",\"P\",\"*\"]\n",
    "colors = sns.color_palette(\"colorblind\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Problem Definition\n",
    "\n",
    "$$ \n",
    "f(x) = \\frac{1}{2 n} \\sum_{i=1}^n \\log (1 + exp(-y_i a_i^T x)) + \\frac{\\lambda}{2} \\|x\\|^2_2\n",
    "$$ \n",
    "$$\n",
    "f_i(x) = \\frac{1}{2} \\log(1+exp(-y_i a^T_i x)) + \\frac{\\lambda}{2} \\|x\\|^2\n",
    "$$\n",
    "$$\n",
    "\\nabla f_i(x) = \\frac{1}{2} \\frac{-y_i}{exp(y_i a_i^T x) + 1} a_i + \\lambda x\n",
    "$$\n",
    "$$\n",
    "\\nabla^2 f_i(x) = \\frac{1}{2} \\frac{exp(y_i a_i^T x)y_i^2}{(1+exp(y_i a_i^T x))^2} a_ia_i^T + \\lambda \n",
    "$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_data():\n",
    "    data = load_svmlight_file(\"datasets/a1a.txt\")\n",
    "    return data[0], data[1]\n",
    "\n",
    "A, y = get_data()\n",
    "\n",
    "A = A.todense()\n",
    "A = np.array(A)\n",
    "\n",
    "n = A.shape[0]\n",
    "d = A.shape[1]\n",
    "\n",
    "lambd = 0.01#1/n\n",
    "mu =  lambd\n",
    "L = la.norm(A, axis=1, ord=2)**2/8 + lambd\n",
    "\n",
    "#initialization\n",
    "np.random.seed(10)\n",
    "x0=0.1*np.random.randn(d,) #starting position\n",
    "\n",
    "def cost(x, random_ind):\n",
    "    batch_size = len(random_ind)\n",
    "    f = np.sum(np.log(1+np.exp(-np.dot(A[random_ind], x) * y[random_ind])))/(2*batch_size) + (lambd/2)*np.sum(x**2)\n",
    "    return f\n",
    "\n",
    "def grad(x, random_ind):\n",
    "    batch_size = len(random_ind)\n",
    "    num = -y[random_ind]\n",
    "    den = (1 + np.exp(np.dot(A[random_ind], x) * y[random_ind]))\n",
    "    c = num/den\n",
    "    c2 = np.dot(c.T,A[random_ind])\n",
    "    final_grad = (c2)/(2*batch_size) + lambd * x\n",
    "    return final_grad\n",
    "\n",
    "def hess(x):\n",
    "    hess_res = lambd*np.identity(d)\n",
    "    for i in range(n):\n",
    "        update_hess = (0.5/n) * (y[i]**2) * (np.exp(y[i]*np.dot(A[i],x))) * np.outer(A[i],A[i]) / (1+np.exp(y[i]*np.dot(A[i],x)))**2\n",
    "        hess_res = hess_res + update_hess\n",
    "    eigs,_ = la.eig(hess_res)\n",
    "    return np.min(eigs),np.max(eigs)\n",
    "\n",
    "\n",
    "\n",
    "f0 = cost(x0, range(n))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### GD solution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#full batch\n",
    "batch_size = n\n",
    "K = 50000 #number of iterations\n",
    "gamma = 1\n",
    "_,ff_star,_ = SGD(cost, grad, hess, K, gamma, x0, batch_size, n)\n",
    "f_sol = ff_star[-1]\n",
    "\n",
    "#plotting angles\n",
    "fig, ax = plt.subplots()\n",
    "plt.xlabel('# iterations')\n",
    "plt.ylabel('cost GD')\n",
    "plt.loglog(ff_star,'--',color = 'k',linewidth=2,label = 'Full Batch GD', markevery=2000)\n",
    "plt.legend(fontsize=12)\n",
    "plt.title(r\"Toy Problem, d=\"+ str(d)+', $\\lambda$={:.2f}'.format(lambd))\n",
    "ax.grid()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Running Optimizers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "K_record_times = np.power(np.arange(0,45),3)\n",
    "K=K_record_times[-1]\n",
    "nexp = 10\n",
    "compute_hess = True\n",
    "\n",
    "#running optimizers\n",
    "methods=[]\n",
    "f_hist=[]\n",
    "gammas_hist = []\n",
    "mu_hist = []\n",
    "L_hist = []\n",
    "\n",
    "#sampling batches at the beginning\n",
    "bs = 20\n",
    "\n",
    "\n",
    "c = 1\n",
    "gamma_max = 2\n",
    "decr = 'sqrt'\n",
    "name, f, gammas_rec = SPS_decr(cost,grad,hess,nexp, K_record_times, compute_hess, c, decr, gamma_max, x0, bs, n)\n",
    "methods.append(name)\n",
    "f_hist.append(f)\n",
    "gammas_hist.append(gammas_rec)\n",
    "print('done')\n",
    "\n",
    "\n",
    "c = 1\n",
    "gamma_max = 5\n",
    "decr = 'sqrt'\n",
    "name, f, gammas_rec = SPS_decr(cost,grad,hess,nexp, K_record_times, compute_hess, c, decr, gamma_max, x0, bs, n)\n",
    "methods.append(name)\n",
    "f_hist.append(f)\n",
    "gammas_hist.append(gammas_rec)\n",
    "print('done')\n",
    "\n",
    "\n",
    "c = 1\n",
    "gamma_max = 10\n",
    "decr = 'sqrt'\n",
    "name, f, gammas_rec = SPS_decr(cost,grad,hess,nexp, K_record_times, compute_hess, c, decr, gamma_max, x0, bs, n)\n",
    "methods.append(name)\n",
    "f_hist.append(f)\n",
    "gammas_hist.append(gammas_rec)\n",
    "print('done')\n",
    "\n",
    "\n",
    "c = 1\n",
    "gamma_max = 100\n",
    "decr = 'sqrt'\n",
    "name, f, gammas_rec = SPS_decr(cost,grad,hess,nexp, K_record_times, compute_hess, c, decr, gamma_max, x0, bs, n)\n",
    "methods.append(name)\n",
    "f_hist.append(f)\n",
    "gammas_hist.append(gammas_rec)\n",
    "print('done')\n",
    "\n",
    "\n",
    "c = 1\n",
    "gamma_max = 1000\n",
    "decr = 'sqrt'\n",
    "name, f, gammas_rec = SPS_decr(cost,grad,hess,nexp, K_record_times, compute_hess, c, decr, gamma_max, x0, bs, n)\n",
    "methods.append(name)\n",
    "f_hist.append(f)\n",
    "gammas_hist.append(gammas_rec)\n",
    "print('done')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "scale = 1\n",
    "fig, ax = plt.subplots(figsize=(4,5))\n",
    "markers = [\"v\",\"o\",\"s\",\"p\",\"P\",\"*\"]\n",
    "colors = sns.color_palette('colorblind')\n",
    "for i in range(len(methods)):\n",
    "    mean_f = np.mean(f_hist[i]-f_sol,1)\n",
    "    std_f = np.std(f_hist[i], 1)\n",
    "    plt.fill_between(K_record_times,mean_f-scale*std_f,mean_f+scale*std_f , alpha=0.2, fc=colors[i])\n",
    "    plt.plot(K_record_times,mean_f,color = colors[i],linewidth=3,label = methods[i], markevery=[33], marker = markers[i],markersize = 10,markeredgewidth=1.5, markeredgecolor=[0,0,0,0.6])\n",
    "\n",
    "plt.yscale(\"log\")\n",
    "plt.xscale(\"linear\")\n",
    "#plt.ylim([0.0025,30])\n",
    "plt.legend(fontsize=12)\n",
    "plt.xlabel('# iterations $(k)$',fontsize=12)\n",
    "plt.ylabel('$f(x^k)-f^*$',fontsize=12)\n",
    "fig.savefig('Toy_Logistic_regression_SPS_sensitivity_gamma_b_2',dpi=200, bbox_inches='tight')\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
