{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "id": "LYt0yzkiUhRw",
    "outputId": "85d5bacc-1a7e-4612-aa53-1876f99ebef3"
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import torch\n",
    "import matplotlib.pyplot as plt\n",
    "from torch import nn, optim\n",
    "from torch.utils.data import DataLoader\n",
    "import torch.nn.functional as F\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "%matplotlib inline\n",
    "torch.manual_seed(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 119,
     "resources": {
      "http://localhost:8080/nbextensions/google.colab/files.js": {
       "data": "Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7CgpmdW5jdGlvbiBfdXBsb2FkRmlsZXMoaW5wdXRJZCwgb3V0cHV0SWQpIHsKICBjb25zdCBzdGVwcyA9IHVwbG9hZEZpbGVzU3RlcChpbnB1dElkLCBvdXRwdXRJZCk7CiAgY29uc3Qgb3V0cHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKG91dHB1dElkKTsKICAvLyBDYWNoZSBzdGVwcyBvbiB0aGUgb3V0cHV0RWxlbWVudCB0byBtYWtlIGl0IGF2YWlsYWJsZSBmb3IgdGhlIG5leHQgY2FsbAogIC8vIHRvIHVwbG9hZEZpbGVzQ29udGludWUgZnJvbSBQeXRob24uCiAgb3V0cHV0RWxlbWVudC5zdGVwcyA9IHN0ZXBzOwoKICByZXR1cm4gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpOwp9CgovLyBUaGlzIGlzIHJvdWdobHkgYW4gYXN5bmMgZ2VuZXJhdG9yIChub3Qgc3VwcG9ydGVkIGluIHRoZSBicm93c2VyIHlldCksCi8vIHdoZXJlIHRoZXJlIGFyZSBtdWx0aXBsZSBhc3luY2hyb25vdXMgc3RlcHMgYW5kIHRoZSBQeXRob24gc2lkZSBpcyBnb2luZwovLyB0byBwb2xsIGZvciBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcC4KLy8gVGhpcyB1c2VzIGEgUHJvbWlzZSB0byBibG9jayB0aGUgcHl0aG9uIHNpZGUgb24gY29tcGxldGlvbiBvZiBlYWNoIHN0ZXAsCi8vIHRoZW4gcGFzc2VzIHRoZSByZXN1bHQgb2YgdGhlIHByZXZpb3VzIHN0ZXAgYXMgdGhlIGlucHV0IHRvIHRoZSBuZXh0IHN0ZXAuCmZ1bmN0aW9uIF91cGxvYWRGaWxlc0NvbnRpbnVlKG91dHB1dElkKSB7CiAgY29uc3Qgb3V0cHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKG91dHB1dElkKTsKICBjb25zdCBzdGVwcyA9IG91dHB1dEVsZW1lbnQuc3RlcHM7CgogIGNvbnN0IG5leHQgPSBzdGVwcy5uZXh0KG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSk7CiAgcmV0dXJuIFByb21pc2UucmVzb2x2ZShuZXh0LnZhbHVlLnByb21pc2UpLnRoZW4oKHZhbHVlKSA9PiB7CiAgICAvLyBDYWNoZSB0aGUgbGFzdCBwcm9taXNlIHZhbHVlIHRvIG1ha2UgaXQgYXZhaWxhYmxlIHRvIHRoZSBuZXh0CiAgICAvLyBzdGVwIG9mIHRoZSBnZW5lcmF0b3IuCiAgICBvdXRwdXRFbGVtZW50Lmxhc3RQcm9taXNlVmFsdWUgPSB2YWx1ZTsKICAgIHJldHVybiBuZXh0LnZhbHVlLnJlc3BvbnNlOwogIH0pOwp9CgovKioKICogR2VuZXJhdG9yIGZ1bmN0aW9uIHdoaWNoIGlzIGNhbGxlZCBiZXR3ZWVuIGVhY2ggYXN5bmMgc3RlcCBvZiB0aGUgdXBsb2FkCiAqIHByb2Nlc3MuCiAqIEBwYXJhbSB7c3RyaW5nfSBpbnB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIGlucHV0IGZpbGUgcGlja2VyIGVsZW1lbnQuCiAqIEBwYXJhbSB7c3RyaW5nfSBvdXRwdXRJZCBFbGVtZW50IElEIG9mIHRoZSBvdXRwdXQgZGlzcGxheS4KICogQHJldHVybiB7IUl0ZXJhYmxlPCFPYmplY3Q+fSBJdGVyYWJsZSBvZiBuZXh0IHN0ZXBzLgogKi8KZnVuY3Rpb24qIHVwbG9hZEZpbGVzU3RlcChpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IGlucHV0RWxlbWVudCA9IGRvY3VtZW50LmdldEVsZW1lbnRCeUlkKGlucHV0SWQpOwogIGlucHV0RWxlbWVudC5kaXNhYmxlZCA9IGZhbHNlOwoKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIG91dHB1dEVsZW1lbnQuaW5uZXJIVE1MID0gJyc7CgogIGNvbnN0IHBpY2tlZFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgaW5wdXRFbGVtZW50LmFkZEV2ZW50TGlzdGVuZXIoJ2NoYW5nZScsIChlKSA9PiB7CiAgICAgIHJlc29sdmUoZS50YXJnZXQuZmlsZXMpOwogICAgfSk7CiAgfSk7CgogIGNvbnN0IGNhbmNlbCA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2J1dHRvbicpOwogIGlucHV0RWxlbWVudC5wYXJlbnRFbGVtZW50LmFwcGVuZENoaWxkKGNhbmNlbCk7CiAgY2FuY2VsLnRleHRDb250ZW50ID0gJ0NhbmNlbCB1cGxvYWQnOwogIGNvbnN0IGNhbmNlbFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgY2FuY2VsLm9uY2xpY2sgPSAoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9OwogIH0pOwoKICAvLyBXYWl0IGZvciB0aGUgdXNlciB0byBwaWNrIHRoZSBmaWxlcy4KICBjb25zdCBmaWxlcyA9IHlpZWxkIHsKICAgIHByb21pc2U6IFByb21pc2UucmFjZShbcGlja2VkUHJvbWlzZSwgY2FuY2VsUHJvbWlzZV0pLAogICAgcmVzcG9uc2U6IHsKICAgICAgYWN0aW9uOiAnc3RhcnRpbmcnLAogICAgfQogIH07CgogIGNhbmNlbC5yZW1vdmUoKTsKCiAgLy8gRGlzYWJsZSB0aGUgaW5wdXQgZWxlbWVudCBzaW5jZSBmdXJ0aGVyIHBpY2tzIGFyZSBub3QgYWxsb3dlZC4KICBpbnB1dEVsZW1lbnQuZGlzYWJsZWQgPSB0cnVlOwoKICBpZiAoIWZpbGVzKSB7CiAgICByZXR1cm4gewogICAgICByZXNwb25zZTogewogICAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgICAgfQogICAgfTsKICB9CgogIGZvciAoY29uc3QgZmlsZSBvZiBmaWxlcykgewogICAgY29uc3QgbGkgPSBkb2N1bWVudC5jcmVhdGVFbGVtZW50KCdsaScpOwogICAgbGkuYXBwZW5kKHNwYW4oZmlsZS5uYW1lLCB7Zm9udFdlaWdodDogJ2JvbGQnfSkpOwogICAgbGkuYXBwZW5kKHNwYW4oCiAgICAgICAgYCgke2ZpbGUudHlwZSB8fCAnbi9hJ30pIC0gJHtmaWxlLnNpemV9IGJ5dGVzLCBgICsKICAgICAgICBgbGFzdCBtb2RpZmllZDogJHsKICAgICAgICAgICAgZmlsZS5sYXN0TW9kaWZpZWREYXRlID8gZmlsZS5sYXN0TW9kaWZpZWREYXRlLnRvTG9jYWxlRGF0ZVN0cmluZygpIDoKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJ24vYSd9IC0gYCkpOwogICAgY29uc3QgcGVyY2VudCA9IHNwYW4oJzAlIGRvbmUnKTsKICAgIGxpLmFwcGVuZENoaWxkKHBlcmNlbnQpOwoKICAgIG91dHB1dEVsZW1lbnQuYXBwZW5kQ2hpbGQobGkpOwoKICAgIGNvbnN0IGZpbGVEYXRhUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICAgIGNvbnN0IHJlYWRlciA9IG5ldyBGaWxlUmVhZGVyKCk7CiAgICAgIHJlYWRlci5vbmxvYWQgPSAoZSkgPT4gewogICAgICAgIHJlc29sdmUoZS50YXJnZXQucmVzdWx0KTsKICAgICAgfTsKICAgICAgcmVhZGVyLnJlYWRBc0FycmF5QnVmZmVyKGZpbGUpOwogICAgfSk7CiAgICAvLyBXYWl0IGZvciB0aGUgZGF0YSB0byBiZSByZWFkeS4KICAgIGxldCBmaWxlRGF0YSA9IHlpZWxkIHsKICAgICAgcHJvbWlzZTogZmlsZURhdGFQcm9taXNlLAogICAgICByZXNwb25zZTogewogICAgICAgIGFjdGlvbjogJ2NvbnRpbnVlJywKICAgICAgfQogICAgfTsKCiAgICAvLyBVc2UgYSBjaHVua2VkIHNlbmRpbmcgdG8gYXZvaWQgbWVzc2FnZSBzaXplIGxpbWl0cy4gU2VlIGIvNjIxMTU2NjAuCiAgICBsZXQgcG9zaXRpb24gPSAwOwogICAgd2hpbGUgKHBvc2l0aW9uIDwgZmlsZURhdGEuYnl0ZUxlbmd0aCkgewogICAgICBjb25zdCBsZW5ndGggPSBNYXRoLm1pbihmaWxlRGF0YS5ieXRlTGVuZ3RoIC0gcG9zaXRpb24sIE1BWF9QQVlMT0FEX1NJWkUpOwogICAgICBjb25zdCBjaHVuayA9IG5ldyBVaW50OEFycmF5KGZpbGVEYXRhLCBwb3NpdGlvbiwgbGVuZ3RoKTsKICAgICAgcG9zaXRpb24gKz0gbGVuZ3RoOwoKICAgICAgY29uc3QgYmFzZTY0ID0gYnRvYShTdHJpbmcuZnJvbUNoYXJDb2RlLmFwcGx5KG51bGwsIGNodW5rKSk7CiAgICAgIHlpZWxkIHsKICAgICAgICByZXNwb25zZTogewogICAgICAgICAgYWN0aW9uOiAnYXBwZW5kJywKICAgICAgICAgIGZpbGU6IGZpbGUubmFtZSwKICAgICAgICAgIGRhdGE6IGJhc2U2NCwKICAgICAgICB9LAogICAgICB9OwogICAgICBwZXJjZW50LnRleHRDb250ZW50ID0KICAgICAgICAgIGAke01hdGgucm91bmQoKHBvc2l0aW9uIC8gZmlsZURhdGEuYnl0ZUxlbmd0aCkgKiAxMDApfSUgZG9uZWA7CiAgICB9CiAgfQoKICAvLyBBbGwgZG9uZS4KICB5aWVsZCB7CiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICB9CiAgfTsKfQoKc2NvcGUuZ29vZ2xlID0gc2NvcGUuZ29vZ2xlIHx8IHt9OwpzY29wZS5nb29nbGUuY29sYWIgPSBzY29wZS5nb29nbGUuY29sYWIgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYi5fZmlsZXMgPSB7CiAgX3VwbG9hZEZpbGVzLAogIF91cGxvYWRGaWxlc0NvbnRpbnVlLAp9Owp9KShzZWxmKTsK",
       "headers": [
        [
         "content-type",
         "application/javascript"
        ]
       ],
       "ok": true,
       "status": 200,
       "status_text": "OK"
      }
     }
    },
    "id": "Cr49gZ6XV2XD",
    "outputId": "ea718ef5-0223-4f8d-8ab1-e3121b6bdd26"
   },
   "outputs": [],
   "source": [
    "from google.colab import files\n",
    "#Load AMES Housing Dataset from Kaggle\n",
    "uploaded = files.upload()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "t0WY_4_0YyUY"
   },
   "outputs": [],
   "source": [
    "import io\n",
    "\n",
    "data_train= pd.read_csv(io.BytesIO(uploaded['train.csv']))\n",
    "X_test=pd.read_csv(io.BytesIO(uploaded['test.csv']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "id": "jW3_o4YsVhqr",
    "outputId": "6130eace-3d50-42b0-ba98-8ab735d4d9f0"
   },
   "outputs": [],
   "source": [
    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
    "device\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "id": "d-8aXA5sUhSb",
    "outputId": "3c01af39-d193-43b9-9c3e-fec3bde48ba7"
   },
   "outputs": [],
   "source": [
    "data = data_train.append(X_test, ignore_index=True, sort=False)\n",
    "data = pd.get_dummies(data, dummy_na=True, drop_first=True)\n",
    "data.drop('Id', axis=1, inplace=True)\n",
    "data.isnull().values.any()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "id": "OVYcOMPZUhTD",
    "outputId": "e7a0ef95-ad5a-4a64-edfa-133624527967"
   },
   "outputs": [],
   "source": [
    "data.fillna(data.median(), inplace=True)\n",
    "columns = data.columns\n",
    "sale_price = data['SalePrice']\n",
    "data.isnull().values.any()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 256
    },
    "id": "9fFTuYYcUhTf",
    "outputId": "fab8c116-a6f7-4d42-c05a-023a13fe7fc6"
   },
   "outputs": [],
   "source": [
    "scaler = MinMaxScaler()\n",
    "data = pd.DataFrame(scaler.fit_transform(data), columns = columns)\n",
    "data['SalePrice'] = sale_price\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 104
    },
    "id": "K4d9EwWDUhT5",
    "outputId": "d70bfeba-852d-4b1e-8d66-f792a451064d"
   },
   "outputs": [],
   "source": [
    "train = data.iloc[:1460]\n",
    "test = data.iloc[1460:]\n",
    "test.drop('SalePrice', axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "kj8Ogh0XUhUW"
   },
   "outputs": [],
   "source": [
    "X_train, X_val, y_train, y_val = train_test_split(train.drop('SalePrice', axis=1), train['SalePrice'], test_size=0.2, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "id": "frn91RfeUhUs",
    "outputId": "0c9a0786-dd1e-473e-bbde-33a325d6dff9"
   },
   "outputs": [],
   "source": [
    "X_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "id": "71yH_DfRUhU6",
    "outputId": "043e2971-f897-44cd-a934-9dca980fd7ff"
   },
   "outputs": [],
   "source": [
    "y_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "rGHePyDcUhVG"
   },
   "outputs": [],
   "source": [
    "#Calculate some other hyperparameters based on data. \n",
    "batch_size=1\n",
    "batch_no = len(X_train) // batch_size  #batches\n",
    "cols=X_train.shape[1] #Number of columns in input matrix\n",
    "n_output=1\n",
    "size_hidden=100\n",
    "class Net(torch.nn.Module):\n",
    "    def __init__(self, n_feature, size_hidden, n_output):\n",
    "        super(Net, self).__init__()\n",
    "        self.hidden = torch.nn.Linear(cols, size_hidden)   # hidden layer\n",
    "        self.predict = torch.nn.Linear(size_hidden, n_output)   # output layer\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = F.relu(self.hidden(x))      # activation function for hidden layer\n",
    "        x = self.predict(x)             # linear output\n",
    "        return x\n",
    "#net = Net(cols, size_hidden, n_output)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "vN_1VMvcUhVV"
   },
   "outputs": [],
   "source": [
    "class Regressor(nn.Module):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        #self.fc1 = nn.Linear(288, 144)\n",
    "        #self.fc2 = nn.Linear(144, 72)\n",
    "        #self.fc3 = nn.Linear(72, 18)\n",
    "        #self.fc4 = nn.Linear(18, 1)\n",
    "        self.fc1 = nn.Linear(288, 72)\n",
    "        self.fc2 = nn.Linear(72, 18)\n",
    "        self.fc3 = nn.Linear(18,1)\n",
    "        self.dropout = nn.Dropout(p=0.1)\n",
    "\n",
    "    def forward(self, x):\n",
    "\n",
    "        #x = self.dropout(F.relu(self.fc1(x)))\n",
    "        #x = self.dropout(F.relu(self.fc2(x)))\n",
    "        #x = self.dropout(F.relu(self.fc3(x)))\n",
    "        x = F.relu(self.fc1(x))\n",
    "        x = F.relu(self.fc2(x))\n",
    "        x = F.relu(self.fc3(x))\n",
    "        #x = F.relu(self.fc4(x))\n",
    "\n",
    "        return x\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "5tAOC0swUhVp"
   },
   "outputs": [],
   "source": [
    "train_batch = np.array_split(X_train, 1168)\n",
    "label_batch = np.array_split(y_train, 1168)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "516pVGAlUhV1"
   },
   "outputs": [],
   "source": [
    "for i in range(len(train_batch)):\n",
    "    train_batch[i] = torch.from_numpy(train_batch[i].values).float()\n",
    "for i in range(len(label_batch)):\n",
    "    label_batch[i] = torch.from_numpy(label_batch[i].values).float().view(-1, 1)\n",
    "\n",
    "X_val = torch.from_numpy(X_val.values).float()\n",
    "y_val = torch.from_numpy(y_val.values).float().view(-1, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "jZvd2pDMUhWN"
   },
   "outputs": [],
   "source": [
    "#import torch\n",
    "from torch.optim.optimizer import Optimizer, required\n",
    "import math\n",
    "\n",
    "\n",
    "class SPPA(Optimizer):\n",
    "  \n",
    "    def __init__(self, params, lr=required, weight_decay=0):\n",
    "        if lr is not required and lr < 0.0:\n",
    "            raise ValueError(\"Invalid learning rate: {}\".format(lr))\n",
    "        if weight_decay < 0.0:\n",
    "            raise ValueError(\"Invalid weight_decay value: {}\".format(weight_decay))\n",
    "\n",
    "        defaults = dict(lr=lr,weight_decay=weight_decay)\n",
    "        \n",
    "        super(SPPA, self).__init__(params, defaults)\n",
    "\n",
    "    def __setstate__(self, state):\n",
    "        super(SPPA, self).__setstate__(state)\n",
    "  \n",
    "\n",
    "    @torch.no_grad()\n",
    "    def step(self, closure,e):\n",
    "        \"\"\"Performs a single optimization step.\n",
    "        Arguments:\n",
    "            closure (callable, optional): A closure that reevaluates the model\n",
    "                and returns the loss.\n",
    "        \"\"\"\n",
    "        # Make sure the closure is always called with grad enabled\n",
    "        closure = torch.enable_grad()(closure)\n",
    "        #closure2 = torch.enable_grad()(closure2)\n",
    "        t=2\n",
    "        for group in self.param_groups:\n",
    "            lr = group['lr']\n",
    "            for p in group['params']:\n",
    "                #print('Shape of Parameters', p.shape)\n",
    "                if p.grad is None:\n",
    "                    continue\n",
    "                d_p = p.grad\n",
    "                #To be implemented further currently run with weight decay =0 .\n",
    "                \n",
    "                previous = p\n",
    "                \n",
    "                for k in range(10): #till convergence of the inner optimization problem\n",
    "                    #orig_loss = closure()\n",
    "                    #func_value = closure2()\n",
    "                    if weight_decay == 1: #1 denotes 1/t decay\n",
    "                        lr= 1.0 / 10*(e+1)\n",
    "                    if weight_decay == 2: #2 denotes 1/sqrt t decay \n",
    "                        lr= 1.0 / 10*math.sqrt(e+1)\n",
    "                    if weight_decay <1: #2 denotes 1/sqrt t decay \n",
    "                        lr= weight_decay\n",
    "                    if p.grad is None:\n",
    "                        continue\n",
    "                    d_p = p.grad\n",
    "                    orig_loss, func_value = closure()\n",
    "                    if func_value==0:\n",
    "                        continue\n",
    "                    #numerator = func_value - torch.matmul(d_p.view(1,d_p.shape[0]*d_p.shape[1]),(p.data-previous).view(d_p.shape[0]*d_p.shape[1],1))\n",
    "                    #numerator = func_value - torch.matmul(d_p.flatten(),(p-previous).flatten())\n",
    "                    numerator = orig_loss - torch.matmul(d_p.flatten(),(p-previous).flatten())\n",
    "                    denominator= 1.0/lr +torch.matmul(d_p.flatten(),d_p.flatten())\n",
    "                    '''\n",
    "                    print('Multip ',torch.flatten((p-previous)).shape)\n",
    "                    print('Gradient ', d_p.flatten().shape)\n",
    "                    \n",
    "                    print('Previous theta ', previous.shape)\n",
    "                   \n",
    "                    \n",
    "                    \n",
    "                    print('Scalar ', numerator/denominator )\n",
    "                     \n",
    "                    '''\n",
    "                    #print('Function value ',func_value)\n",
    "                    #print('Numerator ',numerator[0].item())\n",
    "                    #print('Denominator ',denominator.item())\n",
    "                    #print('Alpha',(-1.0)*numerator[0].item()/denominator.item())\n",
    "                    #print('Before p : ', p.data)\n",
    "                    p.data=torch.add(input=previous, other=d_p, alpha=(-1.0)*numerator.item()/denominator.item())\n",
    "                    #p.data.add_(other=d_p, alpha=(-1.0)*numerator[0].item()/denominator.item() )\n",
    "                    #print('After p : ', p.data)\n",
    "                #print('For loop ended.')\n",
    "                #p.add_(d_p, alpha=-group['lr'])\n",
    "\n",
    "        return loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 52
    },
    "id": "yfvEsMkbfvGy",
    "outputId": "9abd46e8-46de-4ca2-f1c5-489e1a90eb47"
   },
   "outputs": [],
   "source": [
    "'''\n",
    "\n",
    "#Define constants\n",
    "learning_rate = 0.01\n",
    "criterion = nn.MSELoss()\n",
    "n_samples = len(train_batch)\n",
    "n_epoch = 100\n",
    "random_iter = np.random.randint(0, n_samples, n_samples * n_epoch)\n",
    "\n",
    "full_loss_epoch, grad_norm_epoch = Regressor.saga_backward(trainloader, learning_rate, criterion, n_samples, random_iter, n_epoch)\n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "IvRzLWTGUhWX",
    "outputId": "ccedffc0-90fa-4289-d132-5fce597be625"
   },
   "outputs": [],
   "source": [
    "#model = Regressor()\n",
    "#model = Net(cols, size_hidden, n_output)\n",
    "#ps = model(train_batch[0])\n",
    "#model.to(device)\n",
    "\n",
    "\n",
    "#model = Regressor()\n",
    "torch.manual_seed(2)\n",
    "model = Regressor()\n",
    "#model = Net(cols, size_hidden, n_output)\n",
    "model.to(device)\n",
    "#ps = model(train_batch[0])\n",
    "\n",
    "criterion = nn.MSELoss()\n",
    "#optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
    "weight_decay=1\n",
    "optimizer = SPPA(model.parameters(),  lr=0.1, weight_decay=1)\n",
    "epochs = 100\n",
    "\n",
    "train_losses_SPPA = []\n",
    "\n",
    "for e in range(epochs):\n",
    "    model.train()\n",
    "    train_loss = 0\n",
    "    for i in range(len(train_batch)):\n",
    "        def closure():\n",
    "            optimizer.zero_grad()\n",
    "            train_batch[i]=train_batch[i].cuda()\n",
    "            label_batch[i]=label_batch[i].cuda()\n",
    "            output = model(train_batch[i])\n",
    "            loss = criterion(torch.log(output), torch.log(label_batch[i]))\n",
    "            loss.backward()\n",
    "            return loss, output\n",
    "            \n",
    "        optimizer.zero_grad()\n",
    "        train_batch[i]=train_batch[i].cuda()\n",
    "        label_batch[i]=label_batch[i].cuda()\n",
    "        output = model(train_batch[i])\n",
    "        #loss = torch.sqrt(criterion(torch.log(output), torch.log(label_batch[i])))\n",
    "        loss = criterion(torch.log(output), torch.log(label_batch[i]))\n",
    "        if (i==0 and e==0):#Append the initial loss\n",
    "            train_losses_SPPA.append(loss.item())\n",
    "            print('Initial loss ',train_losses_SPPA[i])\n",
    "        loss.backward()\n",
    "        optimizer.step(closure,e)\n",
    "        \n",
    "        train_loss += loss.item()\n",
    "        \n",
    "    else:\n",
    "        test_loss = 0\n",
    "        accuracy = 0\n",
    "        '''\n",
    "        with torch.no_grad():\n",
    "            model.eval()\n",
    "            predictions = model(X_val)\n",
    "            test_loss += torch.sqrt(criterion(torch.log(predictions), torch.log(y_val)))\n",
    "        '''        \n",
    "        train_losses_SPPA.append(train_loss/len(train_batch))\n",
    "        #test_losses.append(test_loss)\n",
    "\n",
    "        print(\"Epoch: {}/{}.. \".format(e+1, epochs),\n",
    "              \"Training Loss: {:.3f}.. \".format(train_loss/len(train_batch)),\n",
    "              )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "id": "vggzNZC1C_ne",
    "outputId": "8befefcc-3c46-4fc4-d3e9-58adc9d116a4"
   },
   "outputs": [],
   "source": [
    "#model = Regressor()\n",
    "#model = Net(cols, size_hidden, n_output)\n",
    "#ps = model(train_batch[0])\n",
    "#model.to(device)\n",
    "\n",
    "\n",
    "#model = Regressor()\n",
    "torch.manual_seed(2)\n",
    "model = Regressor()\n",
    "#model = Net(cols, size_hidden, n_output)\n",
    "model.to(device)\n",
    "#ps = model(train_batch[0])\n",
    "\n",
    "criterion = nn.MSELoss()\n",
    "#optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
    "weight_decay=2\n",
    "optimizer = SPPA(model.parameters(),  lr=0.1, weight_decay=2)\n",
    "epochs = 100\n",
    "\n",
    "train_losses_SPPA2 = []\n",
    "\n",
    "for e in range(epochs):\n",
    "    model.train()\n",
    "    train_loss = 0\n",
    "    for i in range(len(train_batch)):\n",
    "        def closure():\n",
    "            optimizer.zero_grad()\n",
    "            train_batch[i]=train_batch[i].cuda()\n",
    "            label_batch[i]=label_batch[i].cuda()\n",
    "            output = model(train_batch[i])\n",
    "            loss = criterion(torch.log(output), torch.log(label_batch[i]))\n",
    "            loss.backward()\n",
    "            return loss, output\n",
    "            \n",
    "        optimizer.zero_grad()\n",
    "        train_batch[i]=train_batch[i].cuda()\n",
    "        label_batch[i]=label_batch[i].cuda()\n",
    "        output = model(train_batch[i])\n",
    "        #loss = torch.sqrt(criterion(torch.log(output), torch.log(label_batch[i])))\n",
    "        loss = criterion(torch.log(output), torch.log(label_batch[i]))\n",
    "        if (i==0 and e==0):#Append the initial loss\n",
    "            train_losses_SPPA2.append(loss.item())\n",
    "            print('Initial loss ',train_losses_SPPA2[i])\n",
    "        loss.backward()\n",
    "        optimizer.step(closure,e)\n",
    "        \n",
    "        train_loss += loss.item()\n",
    "        \n",
    "    else:\n",
    "        test_loss = 0\n",
    "        accuracy = 0\n",
    "        '''\n",
    "        with torch.no_grad():\n",
    "            model.eval()\n",
    "            predictions = model(X_val)\n",
    "            test_loss += torch.sqrt(criterion(torch.log(predictions), torch.log(y_val)))\n",
    "        '''        \n",
    "        train_losses_SPPA2.append(train_loss/len(train_batch))\n",
    "        #test_losses.append(test_loss)\n",
    "\n",
    "        print(\"Epoch: {}/{}.. \".format(e+1, epochs),\n",
    "              \"Training Loss: {:.3f}.. \".format(train_loss/len(train_batch)),\n",
    "              )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "fWd1sdBgBsMY"
   },
   "outputs": [],
   "source": [
    "\n",
    "torch.manual_seed(2)\n",
    "model = Regressor()\n",
    "model.to(device)\n",
    "\n",
    "criterion = nn.MSELoss()\n",
    "weight_decay=0.01\n",
    "optimizer = SPPA(model.parameters(),  lr=0.01, weight_decay=0.01)\n",
    "epochs = 100\n",
    "\n",
    "train_losses_SPPA3 = []\n",
    "\n",
    "for e in range(epochs):\n",
    "    model.train()\n",
    "    train_loss = 0\n",
    "    for i in range(len(train_batch)):\n",
    "        def closure():\n",
    "            optimizer.zero_grad()\n",
    "            train_batch[i]=train_batch[i].cuda()\n",
    "            label_batch[i]=label_batch[i].cuda()\n",
    "            output = model(train_batch[i])\n",
    "            loss = criterion(torch.log(output), torch.log(label_batch[i]))\n",
    "            loss.backward()\n",
    "            return loss, output\n",
    "            \n",
    "        optimizer.zero_grad()\n",
    "        train_batch[i]=train_batch[i].cuda()\n",
    "        label_batch[i]=label_batch[i].cuda()\n",
    "        output = model(train_batch[i])\n",
    "        #loss = torch.sqrt(criterion(torch.log(output), torch.log(label_batch[i])))\n",
    "        loss = criterion(torch.log(output), torch.log(label_batch[i]))\n",
    "        if (i==0 and e==0):#Append the initial loss\n",
    "            train_losses_SPPA3.append(loss.item())\n",
    "            print('Initial loss ',train_losses_SPPA3[i])\n",
    "        loss.backward()\n",
    "        optimizer.step(closure,e)\n",
    "        \n",
    "        train_loss += loss.item()\n",
    "        \n",
    "    else:\n",
    "        test_loss = 0\n",
    "        accuracy = 0\n",
    "        '''\n",
    "        with torch.no_grad():\n",
    "            model.eval()\n",
    "            predictions = model(X_val)\n",
    "            test_loss += torch.sqrt(criterion(torch.log(predictions), torch.log(y_val)))\n",
    "        '''        \n",
    "        train_losses_SPPA3.append(train_loss/len(train_batch))\n",
    "        #test_losses.append(test_loss)\n",
    "\n",
    "        print(\"Epoch: {}/{}.. \".format(e+1, epochs),\n",
    "              \"Training Loss: {:.3f}.. \".format(train_loss/len(train_batch)),\n",
    "              )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "fhz9DNi4UhWg"
   },
   "outputs": [],
   "source": [
    "len(train_batch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "KHX_EunVUhWp"
   },
   "outputs": [],
   "source": [
    "train_losses_SPPA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "tJ3TbJPaUhWx"
   },
   "outputs": [],
   "source": [
    "torch.manual_seed(2)\n",
    "model = Regressor()\n",
    "criterion = nn.MSELoss()\n",
    "#optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
    "optimizer = torch.optim.SGD(model.parameters(),  lr=0.001, momentum=0.9)\n",
    "epochs = 100\n",
    "\n",
    "train_losses = []\n",
    "\n",
    "for e in range(epochs):\n",
    "    model.train()\n",
    "    train_loss = 0\n",
    "    for i in range(len(train_batch)):\n",
    "        optimizer.zero_grad()\n",
    "        train_batch[i]=train_batch[i].cpu()\n",
    "        label_batch[i]=label_batch[i].cpu()\n",
    "        output = model(train_batch[i])\n",
    "        #loss = torch.sqrt(criterion(torch.log(output), torch.log(label_batch[i])))\n",
    "        loss = criterion(torch.log(output), torch.log(label_batch[i]))\n",
    "        if (i==0  and e==0):#Append the initial loss\n",
    "            train_losses.append(loss.item())\n",
    "            print('Initial loss ',train_losses[i])\n",
    "\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "        \n",
    "        train_loss += loss.item()\n",
    "        \n",
    "    else:\n",
    "        test_loss = 0\n",
    "        accuracy = 0\n",
    "        '''\n",
    "        with torch.no_grad():\n",
    "            model.eval()\n",
    "            predictions = model(X_val)\n",
    "            test_loss += torch.sqrt(criterion(torch.log(predictions), torch.log(y_val)))\n",
    "        '''        \n",
    "        train_losses.append(train_loss/len(train_batch))\n",
    "        #test_losses.append(test_loss)\n",
    "\n",
    "        print(\"Epoch: {}/{}.. \".format(e+1, epochs),\n",
    "              \"Training Loss: {:.3f}.. \".format(train_loss/len(train_batch)),\n",
    "              )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Uvvf2z3SoTBd"
   },
   "outputs": [],
   "source": [
    "torch.manual_seed(2)\n",
    "model = Regressor()\n",
    "criterion = nn.MSELoss()\n",
    "#optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
    "optimizer = torch.optim.SGD(model.parameters(),  lr=0.001)\n",
    "epochs = 100\n",
    "\n",
    "train_losses_sgd = []\n",
    "\n",
    "for e in range(epochs):\n",
    "    model.train()\n",
    "    train_loss = 0\n",
    "    for i in range(len(train_batch)):\n",
    "        optimizer.zero_grad()\n",
    "        train_batch[i]=train_batch[i].cpu()\n",
    "        label_batch[i]=label_batch[i].cpu()\n",
    "        output = model(train_batch[i])\n",
    "        #loss = torch.sqrt(criterion(torch.log(output), torch.log(label_batch[i])))\n",
    "        loss = criterion(torch.log(output), torch.log(label_batch[i]))\n",
    "        if (i==0  and e==0):#Append the initial loss\n",
    "            train_losses_sgd.append(loss.item())\n",
    "            print('Initial loss ',train_losses_sgd[i])\n",
    "\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "        \n",
    "        train_loss += loss.item()\n",
    "        \n",
    "    else:\n",
    "        test_loss = 0\n",
    "        accuracy = 0\n",
    "        '''\n",
    "        with torch.no_grad():\n",
    "            model.eval()\n",
    "            predictions = model(X_val)\n",
    "            test_loss += torch.sqrt(criterion(torch.log(predictions), torch.log(y_val)))\n",
    "        '''        \n",
    "        train_losses_sgd.append(train_loss/len(train_batch))\n",
    "        #test_losses.append(test_loss)\n",
    "\n",
    "        print(\"Epoch: {}/{}.. \".format(e+1, epochs),\n",
    "              \"Training Loss: {:.3f}.. \".format(train_loss/len(train_batch)),\n",
    "              )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "LYRWU_CEUhW_"
   },
   "outputs": [],
   "source": [
    "torch.manual_seed(2)\n",
    "model_adam = Regressor()\n",
    "criterion = nn.MSELoss()\n",
    "optimizer = optim.Adam(model_adam.parameters(), lr=0.001)\n",
    "#optimizer = torch.optim.SGD(model.parameters(),  lr=0.001, momentum=0.9)\n",
    "epochs = 100\n",
    "\n",
    "train_losses_adam = []\n",
    "for e in range(epochs):\n",
    "    model_adam.train()\n",
    "    train_loss = 0\n",
    "    for i in range(len(train_batch)):\n",
    "        optimizer.zero_grad()\n",
    "        output = model_adam(train_batch[i])\n",
    "        #loss = torch.sqrt(criterion(torch.log(output), torch.log(label_batch[i])))\n",
    "        loss = criterion(torch.log(output), torch.log(label_batch[i]))\n",
    "        if (i==0 and e==0):#Append the initial loss\n",
    "            train_losses_adam.append(loss.item())\n",
    "            print('Initial loss ',train_losses_adam[i])\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "        \n",
    "        train_loss += loss.item()\n",
    "        \n",
    "    else:\n",
    "        test_loss = 0\n",
    "        accuracy = 0\n",
    "        \n",
    "        #with torch.no_grad():\n",
    "        #    model.eval()\n",
    "        #    predictions = model(X_val)\n",
    "        #    test_loss += torch.sqrt(criterion(torch.log(predictions), torch.log(y_val)))\n",
    "                \n",
    "        train_losses_adam.append(train_loss/len(train_batch))\n",
    "        #test_losses.append(test_loss)\n",
    "\n",
    "        print(\"Epoch: {}/{}.. \".format(e+1, epochs),\n",
    "              \"Training Loss: {:.3f}.. \".format(train_loss/len(train_batch)),\n",
    "              )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "f4XF5KuZmyf6"
   },
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "from google.colab import files\n",
    "X=[]\n",
    "for i in range(len(train_losses_SPPA[:21])):\n",
    "    X.append(i)\n",
    "\n",
    "plt.figure(figsize=((12,9)))\n",
    "\n",
    "plt.plot(X, train_losses_SPPA[:21], label=\"SPPA-GN (1/sqrt(t))\",linewidth=2.5, color=\"red\")\n",
    "plt.plot(X, train_losses_adam[:21], label=\"Adam\",linewidth=2.5, color=\"darkorange\")\n",
    "plt.plot(X, train_losses[:21], label=\"SGD with Momentum \",linewidth=2.5, color=\"blue\" )\n",
    "plt.plot(X, train_losses_sgd[:21], label=\"SGD\",linewidth=2.5, color=\"green\" )\n",
    "plt.xticks(fontsize=26)\n",
    "plt.yticks(fontsize=26)\n",
    "\n",
    "plt.title(\"Ames Housing Dataset \\n Regression\",fontsize=26)\n",
    "plt.xlabel(\"Epoch (Batch Size 1) \", fontsize=26)\n",
    "plt.ylabel(\"MSE Loss\", fontsize=26)\n",
    "plt.grid(linestyle=\"--\")\n",
    "plt.legend(fontsize=18)\n",
    "plt.tight_layout()\n",
    "plt.savefig(\"AmesSPPALr1divsqrttFirst21_2_GN2.png\")\n",
    "files.download(\"AmesSPPALr1divsqrttFirst21_2_GN2.png\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "NRGHuJr_nwJA"
   },
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "from google.colab import files\n",
    "X=[]\n",
    "for i in range(len(train_losses_SPPA[:6])):\n",
    "    X.append(i)\n",
    "\n",
    "plt.figure(figsize=((12,9)))\n",
    "\n",
    "plt.plot(X, train_losses_SPPA[:6], label=\"SPPA-GN (1/sqrt(t))\",linewidth=2.5, color=\"red\")\n",
    "plt.plot(X, train_losses_adam[:6], label=\"Adam\",linewidth=2.5, color=\"darkorange\")\n",
    "plt.plot(X, train_losses[:6], label=\"SGD with Momentum \",linewidth=2.5, color=\"blue\" )\n",
    "plt.plot(X, train_losses_sgd[:6], label=\"SGD\",linewidth=2.5, color=\"green\" )\n",
    "plt.xticks(fontsize=26)\n",
    "plt.yticks(fontsize=26)\n",
    "\n",
    "plt.title(\"Ames Housing Dataset \\n Regression\",fontsize=26)\n",
    "plt.xlabel(\"Epoch (Batch Size 1) \", fontsize=26)\n",
    "plt.ylabel(\"MSE Loss\", fontsize=26)\n",
    "plt.grid(linestyle=\"--\")\n",
    "plt.legend(fontsize=18)\n",
    "plt.tight_layout()\n",
    "plt.savefig(\"AmesSPPALr1divsqrttFirst5_2GN2.png\")\n",
    "files.download(\"AmesSPPALr1divsqrttFirst5_2GN2.png\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "AeTYHrpsn9V6"
   },
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "from google.colab import files\n",
    "X=[]\n",
    "for i in range(len(train_losses_SPPA)):\n",
    "    X.append(i)\n",
    "\n",
    "plt.figure(figsize=((12,9)))\n",
    "\n",
    "plt.plot(X, train_losses_SPPA, label=\"SPPA-GN (1/sqrt(t))\",linewidth=2.5, color=\"red\")\n",
    "plt.plot(X, train_losses_adam, label=\"Adam\",linewidth=2.5, color=\"darkorange\")\n",
    "plt.plot(X, train_losses, label=\"SGD with Momentum \",linewidth=2.5, color=\"blue\" )\n",
    "plt.plot(X, train_losses_sgd, label=\"SGD\",linewidth=2.5, color=\"green\" )\n",
    "plt.xticks(fontsize=26)\n",
    "plt.yticks(fontsize=26)\n",
    "\n",
    "plt.title(\"Ames Housing Dataset \\n Regression\",fontsize=26)\n",
    "plt.xlabel(\"Epoch (Batch Size 1) \", fontsize=26)\n",
    "plt.ylabel(\"MSE Loss\", fontsize=26)\n",
    "plt.grid(linestyle=\"--\")\n",
    "plt.legend(fontsize=18)\n",
    "plt.tight_layout()\n",
    "plt.savefig(\"AmesSPPALr1divsqrttALL2GN2.png\")\n",
    "files.download(\"AmesSPPALr1divsqrttALL2GN2.png\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "slEtoCNarz8K"
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "collapsed_sections": [],
   "name": "Supplementary ICLR2021 Ames",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
