{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from sweeps import sweep_learning_rates\n",
    "from networks import ReluLayer"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "Sweep a range of network capacities to find a sufficiently parametrized (but not overparametrized)\n",
    "Relu network with two of the single layer Relu nets stacked together with hidden activation\n",
    "between them of dimension $k$."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "IMPORTANT NOTE: Unlike the single layer version, k is width, not k*k."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "learning_rates = [5e-3, 1e-3, 5e-4, 1e-4, 5e-5]\n",
    "\n",
    "final_losses = {}\n",
    "for k in [5, 10, 20, 30, 40, 50, 60, 70, 80, 100, 120]:\n",
    "\n",
    "    print(f'------\\nk = {k}\\n------')\n",
    "    pre_activation_width = k\n",
    "    width_between_layers = k\n",
    "    model_init = lambda: torch.nn.Sequential(\n",
    "        ReluLayer(1, width_between_layers, pre_activation_width),\n",
    "        ReluLayer(width_between_layers, 1, pre_activation_width),\n",
    "    )\n",
    "\n",
    "    final_loss_means, final_loss_vars = sweep_learning_rates(\n",
    "        model_init=model_init,\n",
    "        training_iterations = 20000,\n",
    "        num_means = 1,\n",
    "        diff = 0,\n",
    "        bound = 1,\n",
    "        test_size = 100,\n",
    "        learning_rates = learning_rates,\n",
    "        measurement_interval=100,\n",
    "        final_intervals_to_average=5,\n",
    "        test_grid=False,\n",
    "    )\n",
    "    final_losses[k] = {'mean': torch.tensor(final_loss_means), 'var': torch.tensor(final_loss_vars)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "cm = plt.get_cmap('inferno')\n",
    "for i, (k, losses) in enumerate(final_losses.items()):\n",
    "    means = losses['mean']\n",
    "    vars = losses['var']\n",
    "    if k==40:\n",
    "        width=4\n",
    "    else:\n",
    "        width=1.5\n",
    "    plt.plot(learning_rates, means, linewidth=width, label=f'k={k}', color=cm(1 - i / len(final_losses)))\n",
    "    plt.fill_between(learning_rates, means - vars, means + vars, alpha=0.3, color=cm(1 - i / len(final_losses)))\n",
    "    plt.yscale('log')\n",
    "    plt.xscale('log')\n",
    "    plt.legend()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For the right learning rate, k=40 seems to \"strike bottom\" first.  Run again to see if it happens again."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learning_rates = [5e-3, 1e-3, 5e-4, 1e-4, 5e-5]\n",
    "\n",
    "final_losses = {}\n",
    "for k in [5, 10, 20, 30, 40, 50, 60, 70, 80, 100, 120]:\n",
    "\n",
    "    print(f'------\\nk = {k}\\n------')\n",
    "    pre_activation_width = k\n",
    "    width_between_layers = k\n",
    "    model_init = lambda: torch.nn.Sequential(\n",
    "        ReluLayer(1, width_between_layers, pre_activation_width),\n",
    "        ReluLayer(width_between_layers, 1, pre_activation_width),\n",
    "    )\n",
    "\n",
    "    final_loss_means, final_loss_vars = sweep_learning_rates(\n",
    "        model_init=model_init,\n",
    "        training_iterations = 20000,\n",
    "        num_means = 1,\n",
    "        diff = 0,\n",
    "        bound = 1,\n",
    "        test_size = 100,\n",
    "        learning_rates = learning_rates,\n",
    "        measurement_interval=100,\n",
    "        final_intervals_to_average=5,\n",
    "        test_grid=False,\n",
    "    )\n",
    "    final_losses[k] = {'mean': torch.tensor(final_loss_means), 'var': torch.tensor(final_loss_vars)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cm = plt.get_cmap('inferno')\n",
    "for i, (k, losses) in enumerate(final_losses.items()):\n",
    "    means = losses['mean']\n",
    "    vars = losses['var']\n",
    "    if k==50:\n",
    "        width=4\n",
    "    else:\n",
    "        width=1.5\n",
    "    plt.plot(learning_rates, means, linewidth=width, label=f'k={k}', color=cm(1 - i / len(final_losses)))\n",
    "    plt.fill_between(learning_rates, means - vars, means + vars, alpha=0.3, color=cm(1 - i / len(final_losses)))\n",
    "    plt.yscale('log')\n",
    "    plt.xscale('log')\n",
    "    plt.legend()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here k=50 instead of k=40 struck bottom first, similar size which is nice.  Again to see if this repeats:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learning_rates = [5e-3, 1e-3, 5e-4, 1e-4, 5e-5]\n",
    "\n",
    "final_losses = {}\n",
    "for k in [5, 10, 20, 30, 40, 50, 60, 70, 80, 100, 120]:\n",
    "\n",
    "    print(f'------\\nk = {k}\\n------')\n",
    "    pre_activation_width = k\n",
    "    width_between_layers = k\n",
    "    model_init = lambda: torch.nn.Sequential(\n",
    "        ReluLayer(1, width_between_layers, pre_activation_width),\n",
    "        ReluLayer(width_between_layers, 1, pre_activation_width),\n",
    "    )\n",
    "\n",
    "    final_loss_means, final_loss_vars = sweep_learning_rates(\n",
    "        model_init=model_init,\n",
    "        training_iterations = 20000,\n",
    "        num_means = 1,\n",
    "        diff = 0,\n",
    "        bound = 1,\n",
    "        test_size = 100,\n",
    "        learning_rates = learning_rates,\n",
    "        measurement_interval=100,\n",
    "        final_intervals_to_average=5,\n",
    "        test_grid=False,\n",
    "    )\n",
    "    final_losses[k] = {'mean': torch.tensor(final_loss_means), 'var': torch.tensor(final_loss_vars)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cm = plt.get_cmap('inferno')\n",
    "for i, (k, losses) in enumerate(final_losses.items()):\n",
    "    means = losses['mean']\n",
    "    vars = losses['var']\n",
    "    if k==40:\n",
    "        width=4\n",
    "    else:\n",
    "        width=1.5\n",
    "    plt.plot(learning_rates, means, linewidth=width, label=f'k={k}', color=cm(1 - i / len(final_losses)))\n",
    "    plt.fill_between(learning_rates, means - vars, means + vars, alpha=0.3, color=cm(1 - i / len(final_losses)))\n",
    "    plt.yscale('log')\n",
    "    plt.xscale('log')\n",
    "    plt.legend()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "k=40 again, like the first.  k=50 looks good as well. we'll choose 50 to be sure relu has a fair fight."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
