{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# *Trust Region Newton-CG - to find approximate solution for Logistic Regression*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "\n",
    "path = os.getcwd()\n",
    "parent_path = os.path.abspath(os.path.join(path, os.pardir))\n",
    "sys.path.append(parent_path)\n",
    "\n",
    "import random\n",
    "\n",
    "import pprint as pp\n",
    "import numpy as np\n",
    "import time\n",
    "import os\n",
    "import shutil\n",
    "from numpy import genfromtxt\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "from torch import optim\n",
    "import torch.nn.functional as F\n",
    "from torch.autograd import Variable\n",
    "\n",
    "torch.set_default_dtype(torch.float64)\n",
    "torch.set_num_threads(1) #cpu num\n",
    "\n",
    "import itertools\n",
    "import numpy.linalg  as lin\n",
    "\n",
    "import cProfile, pstats\n",
    "\n",
    "from collections import OrderedDict\n",
    "\n",
    "from Sparse_Init.sparseinit import *    \n",
    "from Sparse_Init.sparsedata import *\n",
    "from Sparse_Init.sparsemodule import *\n",
    "from sklearn.preprocessing import normalize\n",
    "from Sparse_Init.sparsetrcg import *\n",
    "\n",
    "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\n",
    "print (device)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Configuration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "algo = 'trcg' # algorithm\n",
    "dname = 'ijcnn1' # dataset name\n",
    "func = 'Logi' # only for logistic regression\n",
    "StrongConvex = True # L2 regularization for logistic regression"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load data - user need to download datasets from LIBSVM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# specify data directory\n",
    "datafolder = '../data/'+dname+'/'\n",
    "# Specify directory to save log files - optional\n",
    "logfolder = '../Logs/'+dname+'/'+func+'/'+algo+'/'\n",
    "\n",
    "if not os.path.exists(logfolder):\n",
    "    os.makedirs(logfolder)\n",
    "    \n",
    "\n",
    "# dataset files - need to be downloaded from LIBSVM website\n",
    "if dname == 'covtype':\n",
    "    file = datafolder+'covtype.libsvm.binary.scale.bz2'\n",
    "    \n",
    "if dname == 'ijcnn1':\n",
    "    trfile = datafolder+'ijcnn1.bz2'\n",
    "    tefile = datafolder+'ijcnn1.t.bz2'\n",
    "    \n",
    "if dname == 'rcv1':\n",
    "    trfile = datafolder+'rcv1_train.binary.bz2'\n",
    "    tefile = datafolder+'rcv1_test.binary.bz2'\n",
    "    \n",
    "if dname == 'news20':\n",
    "    file = datafolder+'news20.binary.bz2'\n",
    "    \n",
    "if dname == 'real-sim':\n",
    "    file = datafolder+'real-sim.bz2'\n",
    "    \n",
    "    \n",
    "try:\n",
    "    data = SparseData(dname,device,file=file)\n",
    "    csr = data.read()\n",
    "    normalize(csr[0],copy=False)\n",
    "    data.load(_csr=csr)\n",
    "except:\n",
    "    data = SparseData(dname,device,trfile=trfile,tefile=tefile)\n",
    "    train_csr, test_csr = data.read()\n",
    "    normalize(train_csr[0],copy=False)\n",
    "    normalize(test_csr[0],copy=False)\n",
    "    data.load(_trainCSR=train_csr,_testCSR=test_csr)\n",
    "print(data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# algorithm and experiment setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "SEED=[0,1,2,3,4,5,6,7,8,9] # 10 random seeds \n",
    "# note: it is not necessary to initialize w_0 with different random seeds,\n",
    "#       user will find that, regardess of the starting points, \n",
    "#       TRCG can converge to, almost surely, the same \\tilde w^*\n",
    "\n",
    "TotalEP = 1000 # total effective pass\n",
    "\n",
    "# penalty term for logistic regression\n",
    "lam = 1.0/data.trSize\n",
    "LAM = [lam/10.0,lam,lam*10.0]\n",
    "    \n",
    "# trust region initialization\n",
    "radius=0.1 # initial radius\n",
    "initial=0.1 # initial radius\n",
    "radius_max=10.0 # upper-bound of radius\n",
    "precondition=0 # preconditioning choice, set to 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for seed,lam in itertools.product(SEED,LAM):\n",
    "    Fcost = 0.0\n",
    "    Gcost = 0.0\n",
    "    print('======\\nlam - %s | seed - %s\\n======'%(lam,seed))  \n",
    "    \n",
    "    run_status = logfolder+'RUN-lam-%s-seed-%s'%(lam,seed)\n",
    "    done_status = logfolder+'DONE-lam-%s-seed-%s'%(lam,seed)\n",
    "    savefile = logfolder+'trcg-lam-%s-seed-%s.tar'%(lam,seed)\n",
    "    \n",
    "    if os.path.exists(run_status) or os.path.exists(done_status) or os.path.exists(savefile):\n",
    "        print(done_status)\n",
    "        continue\n",
    "    else:\n",
    "        os.makedirs(run_status)\n",
    "    \n",
    "    # results\n",
    "    HIST=[]\n",
    "    STAT=[]\n",
    "    ALPHA=[]\n",
    "\n",
    "    TIME = time.time() # total run time\n",
    "    \n",
    "    np.random.seed(seed)\n",
    "    torch.manual_seed(seed)\n",
    "    \n",
    "    if StrongConvex:\n",
    "        model = ConvexModel(data.num_feature,data.num_label,lam=lam,StrongConvex=True).to(device)\n",
    "    else:\n",
    "        model = ConvexModel(data.num_feature,data.num_label).to(device)\n",
    "            \n",
    "    if len(data.in_te_not_tr)>0:\n",
    "        model.del_in_te_not_tr(data.in_te_not_tr)\n",
    "        \n",
    "    optimizer = SparseTRCG(data,model,device,radius,precondition,eval_BS=2000,\\\n",
    "                           radius_max=radius_max,radius_initial=initial)\n",
    "        \n",
    "    # initial stopping flag\n",
    "    converge=False\n",
    "    fatal=False\n",
    "    toosmall=False\n",
    "    epoch_time = time.time()\n",
    "    \n",
    "    for ep in range(TotalEP):\n",
    "        \n",
    "        timeT = time.time() - epoch_time\n",
    "        epoch_time = time.time()\n",
    "        \n",
    "        if converge or fatal or toosmall: \n",
    "            break\n",
    "        \n",
    "        # compute batch loss,grad,test\n",
    "        Loss, V = model.LossGrad(data,second_order=True)\n",
    "        Grad = np.sum([(gi.data**2).sum().item() for gi in V])\n",
    "        Test = model.ComputeAccuracy(data) \n",
    "    \n",
    "        HIST.append([ep,Loss,Grad,Test])\n",
    "        STAT.append([ep,timeT,Fcost,Gcost])\n",
    "        ALPHA.append([ep,optimizer.radius])\n",
    "        \n",
    "        if ep%50==0:\n",
    "            print('ep: %.2f, alpha: %.4f, loss: %.2e, Grad: %.2e, Test: %.4f, Time: %.2f'\\\n",
    "                  %(ep,optimizer.radius,Loss,Grad,Test,timeT))\n",
    "            \n",
    "        if np.isnan(Loss) or np.isnan(Grad) or np.isnan(Test):\n",
    "            fatal = True\n",
    "            print('ep: %.2f, alpha: %.4f, loss: %.2e, Grad: %.2e, Test: %.4f, Time: %.2f'\\\n",
    "                  %(ep,optimizer.radius,Loss,Grad,Test,timeT))\n",
    "        if Grad < 1e-19:\n",
    "            converge=True\n",
    "            print('ep: %.2f, alpha: %.4f, loss: %.2e, Grad: %.2e, Test: %.4f, Time: %.2f'\\\n",
    "                  %(ep,optimizer.radius,Loss,Grad,Test,timeT))\n",
    "            \n",
    "        if optimizer.radius<1e-10:\n",
    "            toosmall=True\n",
    "            print('ep: %.2f, alpha: %.4f, loss: %.2e, Grad: %.2e, Test: %.4f, Time: %.2f'\\\n",
    "                  %(ep,optimizer.radius,Loss,Grad,Test,timeT))\n",
    "            \n",
    "        d, rho, update, CGITER, cg_term, _, norm_d, norm_p1, numerator, denominator,\\\n",
    "        firstloop, secondloop, thirdloop, cgloop, f_cost, g_cost\\\n",
    "        = optimizer.step(Loss,V)\n",
    "        \n",
    "        Fcost+=f_cost\n",
    "        Gcost+=g_cost\n",
    "            \n",
    "        \n",
    "    TIME = time.time() - TIME # total running time per run\n",
    "    \n",
    "    RESULTS = OrderedDict()\n",
    "    \n",
    "    RESULTS = {\n",
    "        'parm': ['trcg',lam,seed],\n",
    "        'model': [w.data.cpu().numpy() for w in model.parameters()],\n",
    "        'hist': HIST,\n",
    "        'stat': STAT,\n",
    "        'alpha': ALPHA,\n",
    "        'time': TIME\n",
    "    }\n",
    "    \n",
    "    torch.save(RESULTS,savefile)\n",
    "    \n",
    "    # update running status\n",
    "    if os.path.exists(run_status):\n",
    "        os.rmdir(run_status)\n",
    "    if not os.path.exists(done_status):\n",
    "        os.mkdir(done_status)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "exit(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 Anaconda",
   "language": "python",
   "name": "python3anaconda"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
