{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "UU9bQTw8c2H6"
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from numpy import linalg as LA\n",
    "from sklearn import metrics\n",
    "from sklearn.utils import shuffle\n",
    "from sklearn import preprocessing\n",
    "from sklearn.model_selection import train_test_split\n",
    "from pmlb import fetch_data\n",
    "from pmlb import classification_dataset_names, regression_dataset_names\n",
    "import seaborn as sns\n",
    "\n",
    "EPS = 1e-6\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Zeroth and first order oracles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "UJUNl-SStEqs"
   },
   "outputs": [],
   "source": [
    "#zeroth order oracle\n",
    "def f(xi,yi,opt_x):\n",
    "  #logistic regression\n",
    "  h_val=1.0/(1.0+np.exp(-np.dot(xi,opt_x)))\n",
    "  if h_val <= EPS:\n",
    "    h_val = EPS\n",
    "  elif h_val >= 1-EPS and h_val <= 1:\n",
    "    h_val = 1-EPS\n",
    "  return -yi*np.log(h_val)-(1-yi)*np.log(1-h_val)\n",
    "\n",
    "def grad_f(xi,yi,opt_x):\n",
    "  #logistic regression\n",
    "  h_val=1.0/(1.0+np.exp(-np.dot(xi,opt_x)))\n",
    "  return -(yi-h_val)* xi    \n",
    "\n",
    "def true_function_val(f,x,y,opt_x):\n",
    "  f_val=0.0\n",
    "  for ind in range(0,len(x)):\n",
    "    f_val=f_val+f(x[ind],y[ind],opt_x)\n",
    "  f_val=(1.0/len(x))*f_val\n",
    "  return f_val\n",
    "\n",
    "def validate(f, x_test,y_test,x_current):\n",
    "    n = len(x_test)\n",
    "    predicted_labels = (np.sign(x_test@x_current) + 1)/2\n",
    "    errors = np.abs(predicted_labels - y_test)\n",
    "    return 1 - np.mean(errors)\n",
    "    \n",
    "\n",
    "######### 1. zeroth and first order oracles with different samples #########\n",
    "# zeroth order oracle by sampling with specific sampling size\n",
    "def zeroth_oracle(f,x,y,opt_x,sample_size):\n",
    "  sample_list=np.random.choice(range(0,len(x)),sample_size,replace=False)\n",
    "  f_noisy_val=0.0\n",
    "  for i in range(0,sample_size):\n",
    "    ind=sample_list[i]\n",
    "    f_noisy_val=f_noisy_val+f(x[ind],y[ind],opt_x)\n",
    "  f_noisy_val=(1.0/sample_size)*f_noisy_val\n",
    "  return f_noisy_val\n",
    "\n",
    "#first order oracle by sampling with specific sampling size\n",
    "def first_oracle(f,grad_f,x,y,opt_x,subsample_num):\n",
    "  grad_list=np.random.choice(range(0,len(x)),subsample_num,replace=False)\n",
    "  grad_noisy=np.zeros(len(opt_x))\n",
    "  for ind in grad_list:\n",
    "    grad_noisy=grad_noisy+grad_f(x[ind],y[ind],opt_x)\n",
    "  grad_noisy=(1.0/subsample_num)*grad_noisy\n",
    "  return grad_noisy\n",
    "\n",
    "\n",
    "######## 2. same batch of samples for zeroth and first order oracle ##########\n",
    "\n",
    "def zeroth_oracle_epoch(f,x,y,opt_x,sample_size,start_ind):\n",
    "  sample_list=list(range(start_ind,start_ind+sample_size)) \n",
    "  f_noisy_val=0.0\n",
    "  for i in range(0,sample_size):\n",
    "    ind=sample_list[i]\n",
    "    f_noisy_val=f_noisy_val+f(x[ind],y[ind],opt_x)\n",
    "  f_noisy_val=(1.0/sample_size)*f_noisy_val\n",
    "  return f_noisy_val\n",
    "\n",
    "def first_oracle_epoch(f,grad_f,x,y,opt_x,subsample_num,start_ind):\n",
    "  grad_list=list(range(start_ind,start_ind+sample_size)) \n",
    "  grad_noisy=np.zeros(len(opt_x))\n",
    "  for ind in grad_list:\n",
    "    grad_noisy=grad_noisy+grad_f(x[ind],y[ind],opt_x)\n",
    "  grad_noisy=(1.0/subsample_num)*grad_noisy\n",
    "  return grad_noisy\n",
    "\n",
    "##################################################################################\n",
    "\n",
    "def estimate_epi_f(f,x,y,opt_x,zeroth_oracle,sample_size,n_trials,factor=1/5):\n",
    "  result_arr=np.zeros(n_trials)\n",
    "  for i in range(n_trials):\n",
    "    result_arr[i]=zeroth_oracle(f,x,y,opt_x,sample_size)\n",
    "    \n",
    "  return np.std(result_arr)*factor\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Algorithms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "zHnqpIGatUNO"
   },
   "outputs": [],
   "source": [
    "#line search algorithm\n",
    "def line_search(f,grad_f,x,y,zeroth_oracle,first_oracle,sample_size,x_0,\n",
    "                eps_f,alpha_0,alpha_max,dec_gamma,inc_gamma,theta,epoch,\n",
    "                num_epochs_per_estimate=1, factor=1/5, x_test=None, y_test=None):\n",
    "  dim=len(x_0)\n",
    "  total_number=len(x)\n",
    "  x_current=x_0\n",
    "  x_previous=np.ones(dim)\n",
    "  iteration=epoch*(total_number/sample_size)\n",
    "  iteration=int(iteration)\n",
    "  fun_val_arr=np.zeros(iteration)\n",
    "  val_acc_arr = np.zeros(iteration)\n",
    "  alpha= alpha_0\n",
    "  iteration_count=0\n",
    "  prev_epoch_number = -1\n",
    "  while iteration_count<iteration:\n",
    "    # shuffle at the start of each epoch\n",
    "    epoch_number = int(np.floor(iteration_count * sample_size / total_number))\n",
    "    if epoch_number == int(prev_epoch_number + 1) and (epoch_number % num_epochs_per_estimate == 0):\n",
    "      prev_epoch_number += 1\n",
    "      x,y = shuffle(x, y, random_state=iteration_count)\n",
    "      eps_f = estimate_epi_f(f,x,y,x_current,zeroth_oracle,sample_size,n_trials=30,factor=factor)\n",
    "      \n",
    "    grad_approximation=first_oracle(f,grad_f,x,y,x_current,sample_size)\n",
    "    x_new=x_current-alpha*grad_approximation\n",
    "    f_current=zeroth_oracle(f,x,y,x_current,sample_size)\n",
    "    f_new=zeroth_oracle(f,x,y,x_new,sample_size)\n",
    "    if f_new<=f_current-alpha*theta*LA.norm(grad_approximation)**2+2*eps_f:\n",
    "      x_previous=x_current\n",
    "      x_current=x_new\n",
    "      alpha=min(inc_gamma*alpha,alpha_max)  \n",
    "      function_true_new = true_function_val(f,x,y,x_current)\n",
    "      validation_accuracy = validate(f,x_test,y_test,x_current)\n",
    "      val_acc_arr[iteration_count] = validation_accuracy\n",
    "      fun_val_arr[iteration_count]= function_true_new ##or the true value?\n",
    "    else:\n",
    "      alpha=dec_gamma*alpha\n",
    "      function_true_current= true_function_val(f,x,y,x_current)\n",
    "      fun_val_arr[iteration_count]= function_true_current\n",
    "      validation_accuracy = validate(f,x_test,y_test,x_current)\n",
    "      val_acc_arr[iteration_count] = validation_accuracy\n",
    "    iteration_count=iteration_count+1\n",
    "\n",
    "  return x_current,fun_val_arr,val_acc_arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#line search algorithm\n",
    "def line_search_same_batch(f,grad_f,x,y,zeroth_oracle,first_oracle,sample_size,x_0,\n",
    "                      eps_f,alpha_0,alpha_max,dec_gamma,inc_gamma,theta,epoch,\n",
    "                      num_epochs_per_estimate=1, factor=1/5, epi_f_zeroth_oracle=zeroth_oracle, x_test=None, y_test=None):\n",
    "  dim=len(x_0)\n",
    "  total_number=len(x)\n",
    "  x_current=x_0\n",
    "  x_previous=np.ones(dim)\n",
    "  iteration=epoch*(total_number/sample_size)\n",
    "  iteration=int(iteration)\n",
    "  fun_val_arr=np.zeros(iteration)\n",
    "  val_acc_arr = np.zeros(iteration)\n",
    "  alpha= alpha_0\n",
    "  iteration_count=0\n",
    "  start_ind=0\n",
    "  prev_epoch_number = -1\n",
    "  while iteration_count<iteration:\n",
    "    # shuffle at the start of each epoch\n",
    "    if start_ind == 0:\n",
    "        x,y = shuffle(x, y, random_state=iteration_count)\n",
    "        \n",
    "    epoch_number = int(np.floor(iteration_count * sample_size / total_number))\n",
    "    if epoch_number == int(prev_epoch_number + 1) and (epoch_number % num_epochs_per_estimate == 0):\n",
    "      prev_epoch_number += 1\n",
    "      eps_f = estimate_epi_f(f,x,y,x_current,epi_f_zeroth_oracle,sample_size,n_trials=30,factor=factor)\n",
    "    grad_approximation=first_oracle(f,grad_f,x,y,x_current,sample_size,start_ind)\n",
    "    x_new=x_current-alpha*grad_approximation\n",
    "    f_current=zeroth_oracle(f,x,y,x_current,sample_size,start_ind)\n",
    "    f_new=zeroth_oracle(f,x,y,x_new,sample_size,start_ind)\n",
    "    start_ind=(start_ind+sample_size)%total_number\n",
    "    if f_new<=f_current-alpha*theta*LA.norm(grad_approximation)**2+2*eps_f:\n",
    "      x_previous=x_current\n",
    "      x_current=x_new\n",
    "      alpha=min(inc_gamma*alpha,alpha_max)  \n",
    "      function_true_new = true_function_val(f,x,y,x_current)\n",
    "      fun_val_arr[iteration_count]= function_true_new ##or the true value?\n",
    "      validation_accuracy = validate(f,x_test,y_test,x_current)\n",
    "      val_acc_arr[iteration_count] = validation_accuracy\n",
    "    else:\n",
    "      alpha=dec_gamma*alpha\n",
    "      function_true_current= true_function_val(f,x,y,x_current)\n",
    "      fun_val_arr[iteration_count]= function_true_current  ##\n",
    "      validation_accuracy = validate(f,x_test,y_test,x_current)\n",
    "      val_acc_arr[iteration_count] = validation_accuracy\n",
    "    iteration_count=iteration_count+1\n",
    "\n",
    "  return x_current,fun_val_arr,val_acc_arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# the SLS algorithm:\n",
    "def reset_opt_2(eta,eta_max,gamma,sample_size,iteration_count,total_number):\n",
    "  if iteration_count == 0:\n",
    "    return eta_max\n",
    "  else:\n",
    "    eta = eta*gamma**(sample_size/total_number)\n",
    "    return min(eta,10)\n",
    "\n",
    "def sls(f, grad_f, x, y, zeroth_oracle,first_oracle,x_0, c, beta, gamma, eta_max, sample_size, epoch, x_test, y_test):\n",
    "    dim = len(x_0)\n",
    "    total_number = len(x)\n",
    "    iteration = int(epoch*(total_number/sample_size))\n",
    "    fun_val_arr = np.zeros(iteration)\n",
    "    val_acc_arr = np.zeros(iteration)\n",
    "    \n",
    "    iteration_count = 0\n",
    "    x_current=x_0\n",
    "    x_previous=np.ones(dim)\n",
    "    eta = eta_max\n",
    "    iteration_count=0\n",
    "    start_ind=0\n",
    "    count_f_eval_extra=0\n",
    "    \n",
    "    while iteration_count < iteration:\n",
    "        # shuffle at the start of each epoch\n",
    "        if start_ind == 0:\n",
    "            x,y = shuffle(x, y, random_state=iteration_count)\n",
    "        \n",
    "        grad_approximation=first_oracle(f,grad_f,x,y,x_current,sample_size,start_ind)\n",
    "        f_current=zeroth_oracle(f,x,y,x_current,sample_size,start_ind)\n",
    "        eta=reset_opt_2(eta,eta_max,gamma,sample_size,iteration_count,total_number)\n",
    "        x_cand=x_current-eta*grad_approximation\n",
    "        f_cand=zeroth_oracle(f,x,y,x_cand,sample_size,start_ind)\n",
    "        if f_cand <= f_current-c*eta*LA.norm(grad_approximation)**2:\n",
    "          start_ind=(start_ind+sample_size)% total_number\n",
    "          x_previous=x_current\n",
    "          x_current=x_cand\n",
    "          function_true_new = true_function_val(f,x,y,x_current)\n",
    "          fun_val_arr[iteration_count]= function_true_new \n",
    "          validation_accuracy = validate(f,x_test,y_test,x_current)\n",
    "          val_acc_arr[iteration_count] = validation_accuracy\n",
    "          iteration_count=iteration_count+1\n",
    "        else:\n",
    "          fun_val_arr[iteration_count]= true_function_val(f,x,y,x_current)\n",
    "          val_acc_arr[iteration_count] = validate(f,x_test,y_test,x_current)\n",
    "          iteration_count=iteration_count+1\n",
    "          while iteration_count < iteration and f_cand > f_current-c*eta*LA.norm(grad_approximation)**2:\n",
    "              eta=beta*eta\n",
    "              x_cand=x_current-eta*grad_approximation\n",
    "              f_cand=zeroth_oracle(f,x,y,x_cand,sample_size,start_ind)\n",
    "              fun_val_arr[iteration_count]= true_function_val(f,x,y,x_current) \n",
    "              val_acc_arr[iteration_count] = validate(f,x_test,y_test,x_current)\n",
    "              count_f_eval_extra=count_f_eval_extra + 1\n",
    "              iteration_count=iteration_count+1\n",
    "          if f_cand <= f_current-c*eta*LA.norm(grad_approximation)**2:\n",
    "            start_ind=(start_ind+sample_size)% total_number\n",
    "            x_previous=x_current\n",
    "            x_current=x_cand\n",
    "            fun_val_arr[iteration_count-1]= true_function_val(f,x,y,x_current)\n",
    "            val_acc_arr[iteration_count-1] = validate(f,x_test,y_test,x_current)\n",
    "          \n",
    "    return x_current,fun_val_arr, count_f_eval_extra, val_acc_arr\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#line search algorithm with full gradient\n",
    "def line_search_full_grad(f,grad_f,x,y,first_oracle,x_0,alpha_0,alpha_max,dec_gamma,inc_gamma,theta,epoch,x_test,y_test):\n",
    "    dim=len(x_0)\n",
    "    total_number=len(x)\n",
    "    x_current=x_0\n",
    "    x_previous=np.ones(dim)\n",
    "    iteration=epoch\n",
    "    fun_val_arr=np.zeros(iteration)\n",
    "    val_acc_arr = np.zeros(iteration)\n",
    "    alpha= alpha_0\n",
    "    iteration_count=0\n",
    "    while iteration_count<iteration:      \n",
    "        grad_approximation=first_oracle(f,grad_f,x,y,x_current,total_number)\n",
    "        x_new=x_current-alpha*grad_approximation\n",
    "        f_current= true_function_val(f,x,y,x_current)\n",
    "        f_new= true_function_val(f,x,y,x_new)\n",
    "        if f_new<=f_current-alpha*theta*LA.norm(grad_approximation)**2:\n",
    "            x_previous=x_current\n",
    "            x_current=x_new\n",
    "            alpha=min(inc_gamma*alpha,alpha_max)  \n",
    "            fun_val_arr[iteration_count]= f_new \n",
    "            val_acc_arr[iteration_count] = validate(f,x_test,y_test,x_current)\n",
    "        else:\n",
    "            alpha=dec_gamma*alpha\n",
    "            fun_val_arr[iteration_count]= f_current\n",
    "            val_acc_arr[iteration_count] = validate(f,x_test,y_test,x_current)\n",
    "        iteration_count=iteration_count+1\n",
    "    return x_current,fun_val_arr,val_acc_arr"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dataset Tools"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# implementing the rbf kernel: only need to change the input X!\n",
    "\n",
    "def rbf_kernel(A, B, sigma):\n",
    "    distsq = np.square(metrics.pairwise.pairwise_distances(A, B, metric=\"euclidean\"))\n",
    "    K = np.exp(-1 * distsq/(2*sigma**2))\n",
    "    return K\n",
    "\n",
    "def get_dataset(dataset_name, sigma,sample_size):\n",
    "    data = fetch_data(dataset_name)\n",
    "    y = data['target']\n",
    "    X = data.drop('target',axis = 1)\n",
    "    x = preprocessing.scale(X)\n",
    "    n = len(x)\n",
    "    dim=len(x[0])    \n",
    "    X_train, X_test, Y_train, Y_test = train_test_split(x,y, test_size = 0.2, shuffle=False, random_state=0)\n",
    "    len_train=len(X_train)\n",
    "\n",
    "    \n",
    "    X_train, Y_train = shuffle(X_train, Y_train, random_state=0)\n",
    "    total_num = np.ceil(len_train/sample_size)*sample_size\n",
    "    add_num=total_num-len_train\n",
    "    int_add_num=int(add_num)\n",
    "    X_add=np.zeros([int_add_num,dim])\n",
    "    Y_add=np.zeros(int_add_num)\n",
    "    sampled_list=np.random.choice(range(0,len_train),int_add_num,replace=False)\n",
    "    for i in range(0,int_add_num):\n",
    "        ind=sampled_list[i]\n",
    "        X_add[i]=X_train[ind]\n",
    "        Y_add[i]=Y_train[ind]\n",
    "    X_train=np.concatenate((X_train,X_add))\n",
    "    Y_train=np.concatenate((Y_train,Y_add))\n",
    "    X_train, Y_train = shuffle(X_train, Y_train, random_state=1)\n",
    "\n",
    "    k_train_X = rbf_kernel(X_train, X_train, sigma)\n",
    "    k_test_X = rbf_kernel(X_test, X_train, sigma)\n",
    "    \n",
    "    return k_train_X, Y_train, k_test_X ,Y_test\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#All binary classification problems in PMLB with data points from 150-5000 \n",
    "\n",
    "dataset_names_new = ['analcatdata_lawsuit',\n",
    "'australian',\n",
    "'backache',\n",
    "'biomed',\n",
    "'breast_cancer_wisconsin',\n",
    "'breast_cancer',\n",
    "'breast_w',\n",
    "'breast',\n",
    "'buggyCrx',\n",
    "'bupa',\n",
    "'chess',\n",
    "'churn',\n",
    "'clean1',\n",
    "'cleve',\n",
    "'colic',\n",
    "'corral',\n",
    "'credit_a',\n",
    "'credit_g',\n",
    "'crx',\n",
    "'diabetes',\n",
    "'dis',\n",
    "'flare',\n",
    "'GAMETES_Epistasis_2_Way_1000atts_0.4H_EDM_1_EDM_1_1',\n",
    "'GAMETES_Epistasis_2_Way_20atts_0.1H_EDM_1_1',\n",
    "'GAMETES_Epistasis_2_Way_20atts_0.4H_EDM_1_1',\n",
    "'GAMETES_Epistasis_3_Way_20atts_0.2H_EDM_1_1',\n",
    "'GAMETES_Heterogeneity_20atts_1600_Het_0.4_0.2_50_EDM_2_001',\n",
    "'GAMETES_Heterogeneity_20atts_1600_Het_0.4_0.2_75_EDM_2_001',\n",
    "'german',\n",
    "'glass2',\n",
    "'haberman',\n",
    "'heart_c',\n",
    "'heart_h',\n",
    "'heart_statlog',\n",
    "'hepatitis',\n",
    "'Hill_Valley_with_noise',\n",
    "'Hill_Valley_without_noise',\n",
    "'horse_colic',\n",
    "'house_votes_84',\n",
    "'hungarian',\n",
    "'hypothyroid',\n",
    "'ionosphere',\n",
    "'irish',\n",
    "'kr_vs_kp',\n",
    "'mofn_3_7_10',\n",
    "'monk1',\n",
    "'monk2',\n",
    "'monk3',\n",
    "'parity5+5',\n",
    "'pima',\n",
    "'prnn_crabs',\n",
    "'prnn_synth',\n",
    "'profb',\n",
    "'saheart',\n",
    "'sonar',\n",
    "'spambase',\n",
    "'spect',\n",
    "'spectf',\n",
    "'threeOf9',\n",
    "'tic_tac_toe',\n",
    "'tokyo1',\n",
    "'vote',\n",
    "'wdbc',\n",
    "'xd6']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Binary classification experiments"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "np.random.seed(1)\n",
    "\n",
    "#################### Real Datasets: non-separable #######################\n",
    "sample_size= 128 # try different values\n",
    "data_sets = dataset_names_new\n",
    "number_datasets=len(data_sets)\n",
    "\n",
    "perform_our_algo_same_batch= np.zeros(number_datasets)\n",
    "perform_sls= np.zeros(number_datasets)\n",
    "perform_full_grad= np.zeros(number_datasets)\n",
    "dataset_ind=0\n",
    "\n",
    "for dataset in data_sets:\n",
    "    sigma_para=1\n",
    "    X_train,Y_train,X_test,Y_test=get_dataset(dataset, sigma_para, sample_size)\n",
    "\n",
    "    #run the algorithm with multiple initial points and check performance\n",
    "\n",
    "    n = len(X_train)\n",
    "    num_of_batch =n/sample_size\n",
    "    print(\"DATASET: \", dataset)\n",
    "    print(\"number of 128 samples: \",num_of_batch)\n",
    "    dim=len(X_train[0])\n",
    "\n",
    "    trials=5\n",
    "    epoch=100\n",
    "    iteration=int(epoch*num_of_batch)\n",
    "    fun_value_matrix1=np.zeros([trials,iteration])\n",
    "    fun_value_matrix_2=np.zeros([trials,iteration])\n",
    "    fun_value_matrix_full=np.zeros([trials,epoch])\n",
    "    val_acc_matrix1=np.zeros([trials,iteration])\n",
    "    val_acc_matrix_2=np.zeros([trials,iteration])\n",
    "    val_acc_matrix_full=np.zeros([trials,epoch])\n",
    "    count_f_eval_extra_arr=np.zeros(trials)\n",
    "\n",
    "    for k in range(0,trials):\n",
    "        #parameters for line search\n",
    "        X_train,Y_train= shuffle(X_train, Y_train, random_state=k)\n",
    "        x_0=np.random.normal(0, 1, dim)\n",
    "        dec_gamma=0.8\n",
    "        inc_gamma=1.25\n",
    "        theta=0.2\n",
    "        alpha_max=10\n",
    "        alpha_0=1\n",
    "        n_trials=30\n",
    "        ### use the standard deviation of noisy function values at x_0 as epi_f ###\n",
    "        eps_f = estimate_epi_f(f,X_train,Y_train,x_0,zeroth_oracle,sample_size,n_trials) # try different values\n",
    "\n",
    "\n",
    "        x_output1,fun_value_arr1,val_acc_arr1=line_search_same_batch(f,grad_f,X_train,Y_train,zeroth_oracle_epoch,\n",
    "                                                      first_oracle_epoch,sample_size,x_0,\n",
    "                                                      eps_f,alpha_0,alpha_max,dec_gamma,inc_gamma,\n",
    "                                                      theta,epoch,factor=1/5,epi_f_zeroth_oracle=zeroth_oracle,\n",
    "                                                      x_test=X_test,y_test=Y_test)\n",
    "        fun_value_matrix1[k,:] = fun_value_arr1\n",
    "        val_acc_matrix1[k,:] = val_acc_arr1\n",
    "\n",
    "        x_output_full,fun_val_arr_full,val_acc_arr_full = line_search_full_grad(f,grad_f,X_train,Y_train,first_oracle,x_0,alpha_0,\n",
    "                                                             alpha_max,dec_gamma,inc_gamma,theta,epoch,X_test,Y_test)\n",
    "        fun_value_matrix_full[k,:] = fun_val_arr_full\n",
    "        val_acc_matrix_full[k,:] = val_acc_arr_full\n",
    "\n",
    "        c=0.1\n",
    "        beta=0.9\n",
    "        gamma=1.5\n",
    "        eta_max=1\n",
    "        x_output_2, fun_value_arr_2, count_f_eval_extra, val_acc_arr_2 = sls(f, grad_f,X_train,Y_train,\n",
    "                                                                           zeroth_oracle_epoch,first_oracle_epoch,x_0,\n",
    "                                                                           c, beta, gamma, eta_max, sample_size, epoch,\n",
    "                                                                          X_test, Y_test)  \n",
    "        fun_value_matrix_2[k,:] = fun_value_arr_2\n",
    "        val_acc_matrix_2[k,:] = val_acc_arr_2\n",
    "        count_f_eval_extra_arr[k] = count_f_eval_extra\n",
    "\n",
    "    fun_value_means1 = fun_value_matrix1.mean(0)\n",
    "    val_acc_means1 = val_acc_matrix1.mean(0)\n",
    "    fun_value_means_2 = fun_value_matrix_2.mean(0)\n",
    "    val_acc_means_2 = val_acc_matrix_2.mean(0)\n",
    "    fun_value_means_full = fun_value_matrix_full.mean(0)\n",
    "    val_acc_means_full = val_acc_matrix_full.mean(0)\n",
    "    \n",
    "    perform_our_algo_same_batch[dataset_ind] = np.mean(fun_value_matrix1.min(axis=1))\n",
    "    perform_sls[dataset_ind] = np.mean(fun_value_matrix_2.min(axis=1))\n",
    "    perform_full_grad[dataset_ind] = np.mean(fun_value_matrix_full.min(axis=1))\n",
    "    dataset_ind = dataset_ind+1\n",
    "\n",
    "    fun_value_std1 = np.std(fun_value_matrix1, axis=0)\n",
    "    val_acc_std1 = np.std(val_acc_matrix1, axis=0)\n",
    "    fun_value_std_2 = np.std(fun_value_matrix_2, axis=0)\n",
    "    val_acc_std_2 = np.std(val_acc_matrix_2, axis=0)\n",
    "    fun_value_std_full = np.std(fun_value_matrix_full, axis=0)\n",
    "    val_acc_std_full = np.std(val_acc_matrix_full, axis=0)\n",
    "\n",
    "    # plot the mean with standard deviation:\n",
    "    iteration_num = list(range(iteration))\n",
    "    epoch_itr_num = num_of_batch*np.arange(epoch)\n",
    "    fig, ax = plt.subplots()\n",
    "    clrs = sns.color_palette(\"tab10\", 10)\n",
    "    ax.plot(iteration_num, fun_value_means1, label=\"Adaptive line search\", c=clrs[1])\n",
    "    ax.fill_between(iteration_num, fun_value_means1-fun_value_std1, fun_value_means1+fun_value_std1 ,alpha=0.3, facecolor=clrs[1])\n",
    "    ax.plot(iteration_num, fun_value_means_2, label=\"SLS algorithm\", c=clrs[2])\n",
    "    ax.fill_between(iteration_num, fun_value_means_2-fun_value_std_2, fun_value_means_2+fun_value_std_2 ,alpha=0.3, facecolor=clrs[2])\n",
    "    ax.plot(epoch_itr_num, fun_value_means_full, label=\"Full gradient line search\", c=clrs[3])\n",
    "    ax.fill_between(epoch_itr_num, fun_value_means_full-fun_value_std_full, fun_value_means_full+fun_value_std_full ,alpha=0.3, facecolor=clrs[3])\n",
    "   \n",
    "    fontsize=19\n",
    "    plt.title('Training loss %s'%dataset, fontsize=fontsize)\n",
    "    ax.legend(fontsize=fontsize)\n",
    "    ax.set_yscale('log')\n",
    "    plt.show()\n",
    "    fig.savefig(dataset+'_full_compare_train.png', dpi=300)\n",
    "    plt.close()\n",
    "    \n",
    "    # Plot validation accuracy\n",
    "    fig, ax = plt.subplots()\n",
    "    ax.plot(iteration_num, val_acc_means1, label=\"Adaptive line search\", c=clrs[1])\n",
    "    ax.fill_between(iteration_num, np.maximum(val_acc_means1-val_acc_std1,0.0), np.minimum(val_acc_means1+val_acc_std1,1.0) ,alpha=0.3, facecolor=clrs[1])\n",
    "    ax.plot(iteration_num, val_acc_means_2, label=\"SLS algorithm\", c=clrs[2])\n",
    "    ax.fill_between(iteration_num, np.maximum(val_acc_means_2-val_acc_std_2,0.0), np.minimum(val_acc_means_2+val_acc_std_2, 1.0) ,alpha=0.3, facecolor=clrs[2])\n",
    "    ax.plot(epoch_itr_num, val_acc_means_full, label=\"Full gradient line search\", c=clrs[3])\n",
    "    ax.fill_between(epoch_itr_num, np.maximum(val_acc_means_full-val_acc_std_full,0.0), np.minimum(val_acc_means_full+val_acc_std_full, 1.0) ,alpha=0.3, facecolor=clrs[3])\n",
    "   \n",
    "    ax.legend(fontsize=fontsize)\n",
    "    plt.title('Validation accuracy %s'%dataset, fontsize=fontsize)\n",
    "    #ax.set_yscale('log')\n",
    "    \n",
    "    plt.show()\n",
    "    fig.savefig(dataset+'_full_compare_test.png', dpi=300)\n",
    "    plt.close()\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Parallel Experiments"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "import ray\n",
    "import time\n",
    "\n",
    "# Start Ray.\n",
    "ray.shutdown()\n",
    "ray.init()\n",
    "\n",
    "sample_size= 128 # try different values\n",
    "data_sets = dataset_names_new\n",
    "number_datasets=len(data_sets)\n",
    "\n",
    "perform_our_algo_same_batch= np.zeros(number_datasets)\n",
    "perform_sls= np.zeros(number_datasets)\n",
    "perform_full_grad= np.zeros(number_datasets)\n",
    "\n",
    "trials=5\n",
    "epoch=100\n",
    "\n",
    "@ray.remote\n",
    "def parallel_wrapper(dataset):\n",
    "    np.random.seed(1)\n",
    "\n",
    "    #################### Real Datasets: non-separable #######################\n",
    "\n",
    "    sigma_para=1\n",
    "    X_train,Y_train,X_test,Y_test=get_dataset(dataset, sigma_para, sample_size)\n",
    "\n",
    "    #run the algorithm with multiple initial points and check performance\n",
    "\n",
    "    n = len(X_train)\n",
    "    num_of_batch =n/sample_size\n",
    "    print(\"DATASET: \", dataset)\n",
    "    dim=len(X_train[0])\n",
    "\n",
    "    iteration=int(epoch*num_of_batch)\n",
    "    #     fun_value_matrix=np.zeros([trials,iteration])\n",
    "    fun_value_matrix1=np.zeros([trials,iteration])\n",
    "    fun_value_matrix_2=np.zeros([trials,iteration])\n",
    "    fun_value_matrix_full=np.zeros([trials,epoch])\n",
    "    val_acc_matrix1=np.zeros([trials,iteration])\n",
    "    val_acc_matrix_2=np.zeros([trials,iteration])\n",
    "    val_acc_matrix_full=np.zeros([trials,epoch])\n",
    "    count_f_eval_extra_arr=np.zeros(trials)\n",
    "\n",
    "    for k in range(0,trials):\n",
    "      #parameters for line search\n",
    "      X_train,Y_train= shuffle(X_train, Y_train, random_state=k)\n",
    "      x_0=np.random.normal(0, 1, dim)\n",
    "      dec_gamma=0.8\n",
    "      inc_gamma=1.25\n",
    "      theta=0.2\n",
    "      alpha_max=10\n",
    "      alpha_0=1\n",
    "      n_trials=30\n",
    "    ### use the standard deviation of noisy function values at x_0 as epi_f ###\n",
    "      eps_f = estimate_epi_f(f,X_train,Y_train,x_0,zeroth_oracle,sample_size,n_trials) # try different values\n",
    "\n",
    "      x_output1,fun_value_arr1,val_acc_arr1=line_search_same_batch(f,grad_f,X_train,Y_train,zeroth_oracle_epoch,\n",
    "                                                      first_oracle_epoch,sample_size,x_0,\n",
    "                                                      eps_f,alpha_0,alpha_max,dec_gamma,inc_gamma,\n",
    "                                                      theta,epoch,factor=1/5,epi_f_zeroth_oracle=zeroth_oracle,\n",
    "                                                      x_test=X_test,y_test=Y_test)\n",
    "      fun_value_matrix1[k,:] = fun_value_arr1\n",
    "      val_acc_matrix1[k,:] = val_acc_arr1\n",
    "\n",
    "      x_output_full,fun_val_arr_full,val_acc_arr_full = line_search_full_grad(f,grad_f,X_train,Y_train,first_oracle,x_0,alpha_0,\n",
    "                                                             alpha_max,dec_gamma,inc_gamma,theta,epoch,X_test,Y_test)\n",
    "      fun_value_matrix_full[k,:] = fun_val_arr_full\n",
    "      val_acc_matrix_full[k,:] = val_acc_arr_full\n",
    "\n",
    "      c=0.1\n",
    "      beta=0.9\n",
    "      gamma=1.5\n",
    "      eta_max=1\n",
    "      x_output_2, fun_value_arr_2, count_f_eval_extra, val_acc_arr_2 = sls(f, grad_f,X_train,Y_train,\n",
    "                                                                           zeroth_oracle_epoch,first_oracle_epoch,x_0,\n",
    "                                                                           c, beta, gamma, eta_max, sample_size, epoch,\n",
    "                                                                          X_test, Y_test)  \n",
    "      fun_value_matrix_2[k,:] = fun_value_arr_2\n",
    "      val_acc_matrix_2[k,:] = val_acc_arr_2\n",
    "      count_f_eval_extra_arr[k] = count_f_eval_extra\n",
    "    return fun_value_matrix1, fun_value_matrix_2, fun_value_matrix_full, val_acc_matrix1, val_acc_matrix_2, val_acc_matrix_full, count_f_eval_extra_arr\n",
    "\n",
    "result_ids = []\n",
    "\n",
    "for dataset in data_sets:\n",
    "    result_ids.append(parallel_wrapper.remote(dataset))\n",
    "\n",
    "\n",
    "results = ray.get(result_ids)  \n",
    "ray.shutdown()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "epoch = 100\n",
    "for i in range(len(data_sets)):\n",
    "    dataset = data_sets[i]\n",
    "    result = results[i]\n",
    "    fun_value_matrix1, fun_value_matrix_2, fun_value_matrix_full, \\\n",
    "    val_acc_matrix1, val_acc_matrix_2, val_acc_matrix_full, count_f_eval_extra_arr = result\n",
    "    \n",
    "    #     fun_value_means = fun_value_matrix.mean(0)\n",
    "    fun_value_means1 = fun_value_matrix1.mean(0)\n",
    "    val_acc_means1 = val_acc_matrix1.mean(0)\n",
    "    fun_value_means_2 = fun_value_matrix_2.mean(0)\n",
    "    val_acc_means_2 = val_acc_matrix_2.mean(0)\n",
    "    fun_value_means_full = fun_value_matrix_full.mean(0)\n",
    "    val_acc_means_full = val_acc_matrix_full.mean(0)\n",
    "\n",
    "    #     perform_our_algo[dataset_ind] = np.mean(fun_value_matrix.min(axis=1))\n",
    "    perform_our_algo_same_batch[i] = np.mean(fun_value_matrix1.min(axis=1))\n",
    "    perform_sls[i] = np.mean(fun_value_matrix_2.min(axis=1))\n",
    "    perform_full_grad[i] = np.mean(fun_value_matrix_full.min(axis=1))\n",
    "\n",
    "    #     fun_value_std = np.std(fun_value_matrix, axis=0)\n",
    "    fun_value_std1 = np.std(fun_value_matrix1, axis=0)\n",
    "    val_acc_std1 = np.std(val_acc_matrix1, axis=0)\n",
    "    fun_value_std_2 = np.std(fun_value_matrix_2, axis=0)\n",
    "    val_acc_std_2 = np.std(val_acc_matrix_2, axis=0)\n",
    "    fun_value_std_full = np.std(fun_value_matrix_full, axis=0)\n",
    "    val_acc_std_full = np.std(val_acc_matrix_full, axis=0)\n",
    "\n",
    "    # plot the mean with standard deviation:\n",
    "    iteration_num = np.arange(fun_value_means1.shape[0])\n",
    "    num_of_batch = int(fun_value_means1.shape[0]/epoch)\n",
    "    epoch_itr_num = num_of_batch*np.arange(epoch)\n",
    "    fig, ax = plt.subplots()\n",
    "    clrs = sns.color_palette(\"tab10\", 10)\n",
    "    # with sns.axes_style(\"darkgrid\"):\n",
    "    #     ax.plot(iteration_num, fun_value_means, label=\"our algorithm\", c=clrs[0])\n",
    "    #     ax.fill_between(iteration_num, fun_value_means-fun_value_std, fun_value_means+fun_value_std ,alpha=0.3, facecolor=clrs[0])\n",
    "    ax.plot(iteration_num, fun_value_means1, label=\"ALOE \", c=clrs[1])\n",
    "    ax.fill_between(iteration_num, fun_value_means1-fun_value_std1, fun_value_means1+fun_value_std1 ,alpha=0.3, facecolor=clrs[1])\n",
    "    ax.plot(iteration_num, fun_value_means_2, label=\"SLS \", c=clrs[2])\n",
    "    ax.fill_between(iteration_num, fun_value_means_2-fun_value_std_2, fun_value_means_2+fun_value_std_2 ,alpha=0.3, facecolor=clrs[2])\n",
    "    ax.plot(epoch_itr_num, fun_value_means_full, label=\"Full gradient LS\", c=clrs[3])\n",
    "    ax.fill_between(epoch_itr_num, fun_value_means_full-fun_value_std_full, fun_value_means_full+fun_value_std_full ,alpha=0.3, facecolor=clrs[3])\n",
    "\n",
    "    fontsize=19\n",
    "    plt.title('Training loss for %s'%dataset, fontsize=fontsize)\n",
    "    plt.xlabel('iteration',fontsize=fontsize)\n",
    "    ax.legend(fontsize=fontsize)\n",
    "    ax.set_yscale('log')\n",
    "    plt.show()\n",
    "    fig.savefig(dataset+'_full_compare_train.png', dpi=300)\n",
    "    plt.close()\n",
    "\n",
    "    # Plot validation accuracy\n",
    "    fig, ax = plt.subplots()\n",
    "    ax.plot(iteration_num, val_acc_means1, label=\"ALOE \", c=clrs[1])\n",
    "    ax.fill_between(iteration_num, np.maximum(val_acc_means1-val_acc_std1,0.0), np.minimum(val_acc_means1+val_acc_std1,1.0) ,alpha=0.3, facecolor=clrs[1])\n",
    "    ax.plot(iteration_num, val_acc_means_2, label=\"SLS \", c=clrs[2])\n",
    "    ax.fill_between(iteration_num, np.maximum(val_acc_means_2-val_acc_std_2,0.0), np.minimum(val_acc_means_2+val_acc_std_2, 1.0) ,alpha=0.3, facecolor=clrs[2])\n",
    "    ax.plot(epoch_itr_num, val_acc_means_full, label=\"Full gradient LS\", c=clrs[3])\n",
    "    ax.fill_between(epoch_itr_num, np.maximum(val_acc_means_full-val_acc_std_full,0.0), np.minimum(val_acc_means_full+val_acc_std_full, 1.0) ,alpha=0.3, facecolor=clrs[3])\n",
    "\n",
    "    ax.legend(fontsize=fontsize)\n",
    "    plt.title('Test accuracy of %s'%dataset, fontsize=fontsize)\n",
    "    plt.xlabel('iteration',fontsize=fontsize)\n",
    "    #ax.set_yscale('log')\n",
    "\n",
    "    plt.show()\n",
    "    fig.savefig(dataset+'_full_compare_test.png', dpi=300)\n",
    "    plt.close()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Histograms of performance comparisons"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.hist(perform_sls-perform_our_algo_same_batch, bins='auto') \n",
    "# plt.title(\"Histogram of perform_sls-perform_our_algo_same_batch\")\n",
    "plt.savefig('Histogram of perform_sls-perform_our_algo_same_batch.png')\n",
    "np.mean(perform_sls-perform_our_algo_same_batch)\n",
    "perform_sls-perform_our_algo_same_batch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.hist(perform_full_grad-perform_our_algo_same_batch, bins=12) \n",
    "# plt.title(\"perform_full_grad-perform_our_algo_same_batch\")\n",
    "plt.savefig('Histogram of perform_full_grad-perform_our_algo_same_batch.png')\n",
    "perform_full_grad-perform_our_algo_same_batch"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "Line Search Method.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
