{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'tensorflow'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-2-3cde90fefb39>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mloadtxt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0msavetxt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mtensorflow\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      8\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mkeras\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodels\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mSequential\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mkeras\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlayers\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mDense\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'tensorflow'"
     ]
    }
   ],
   "source": [
    "#Load required libraries\n",
    "#import pandas\n",
    "import random\n",
    "import numpy as np\n",
    "from numpy import loadtxt\n",
    "from numpy import savetxt\n",
    "import tensorflow as tf\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense\n",
    "from keras.wrappers.scikit_learn import KerasRegressor\n",
    "from sklearn.model_selection import cross_validate\n",
    "from sklearn.model_selection import KFold\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.pipeline import Pipeline\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib.patches import Patch\n",
    "from matplotlib.lines import Line2D"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load input data X and 100 class functions Y\n",
    "X = loadtxt(\"X1400\", delimiter=',')\n",
    "Y = loadtxt(\"100GaussClassfunctions1400.csv\",  delimiter=',')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Input alphabet for neural network is half the size of that for linear regression. \n",
    "#Pick first 49 columns plus last column\n",
    "Xnn = X[:,range(-1,49)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Our neural network model\n",
    "def NN3_model():\n",
    "\t# create model\n",
    "\tmodel = Sequential()\n",
    "\tmodel.add(Dense(180, input_dim=50, kernel_initializer='normal', kernel_regularizer=tf.keras.regularizers.l2(1e-3), activation='selu'))\n",
    "\tmodel.add(Dense(120, kernel_initializer='normal', kernel_regularizer=tf.keras.regularizers.l2(1e-3), activation='selu'))\n",
    "\tmodel.add(Dense(50, kernel_initializer='normal', kernel_regularizer=tf.keras.regularizers.l2(1e-3), activation='selu'))\n",
    "\tmodel.add(Dense(1, kernel_initializer='normal', kernel_regularizer=tf.keras.regularizers.l2(1e-3), activation='selu'))     \n",
    "\t# Compile model\n",
    "\tmodel.compile(loss='mean_squared_error', optimizer= 'Adamax')\n",
    "\treturn model\n",
    "\n",
    "NN3_model().summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Evaluate NN3 using 10 folds\n",
    "estimator = []\n",
    "estimator.append(('standardize', StandardScaler()))\n",
    "estimator.append(('mlp', KerasRegressor(build_fn=NN3_model, epochs=200, batch_size=100, verbose=1)))\n",
    "pipeline = Pipeline(estimator)\n",
    "kfold = KFold(n_splits=10)\n",
    "scoring = {'MSE': 'neg_mean_squared_error'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Creates an empty array to be appended with 10 dimensional column vectors\n",
    "NN3losses = [[] for _ in range(10)]\n",
    "\n",
    "# For loop that adds a class function to be trained for each iteration\n",
    "for k in range(100):\n",
    "    scoreNN3 = cross_validate(pipeline, Xnn, Y[:,k], cv=kfold, scoring = scoring, \n",
    "                     return_train_score=True,  return_estimator=True)\n",
    "    TempLossNN3 = scoreNN3['test_MSE']\n",
    "    # Collect the losses for each class function in a new column\n",
    "    NN3losses = np.append(NN3losses, TempLossNN3.reshape((10,1)), axis=1)\n",
    "    #save results\n",
    "    savetxt('NN3losses.csv' , NN3losses, delimiter=\",\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Our Linear regression model\n",
    "def Linear_model():\n",
    "\t# create model\n",
    "\tmodel = Sequential()\n",
    "\tmodel.add(Dense(1, input_dim=100, kernel_initializer='normal'))     \n",
    "\t# Compile model\n",
    "\tmodel.compile(loss='mean_squared_error', optimizer= 'Adam')\n",
    "\treturn model\n",
    "Linear_model().summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cross evaluate linear regression model using 10 folds\n",
    "estimator = []\n",
    "estimator.append(('standardize', StandardScaler()))\n",
    "estimator.append(('mlp', KerasRegressor(build_fn=Linear_model, epochs=200, batch_size=100, verbose=1)))\n",
    "pipeline = Pipeline(estimator)\n",
    "kfold = KFold(n_splits=10)\n",
    "scoring = {'MSE': 'neg_mean_squared_error'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Creates an empty array to be appended with 10 dimensional column vectors\n",
    "Linearlosses = [[] for _ in range(10)]\n",
    "\n",
    "# For loop that adds a class function to be trained for each iteration\n",
    "for k in range(100):\n",
    "    scoreLinear = cross_validate(pipeline, X, Y[:,k], cv=kfold, scoring = scoring, \n",
    "                     return_train_score=True,  return_estimator=True)\n",
    "    TempLossLinear = scoreLinear['test_MSE']\n",
    "    Linearlosses = np.append(Linearlosses, TempLossLinear.reshape((10,1)), axis=1)\n",
    "    #save results\n",
    "    savetxt('Linearlosses.csv' , Linearlosses, delimiter=\",\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create Scatter Plots\n",
    "plt.scatter(np.std(NN3losses, axis =0), -np.mean(NN3losses, axis = 0), c=\"blue\")\n",
    "plt.scatter(np.std(Linearlosses, axis =0), -np.mean(Linearlosses, axis = 0), c=\"red\")\n",
    "plt.title(\"Symmetric groups class function learning performance\")\n",
    "plt.xlabel(\"Standard Deviation over 10 folds\")\n",
    "plt.ylabel(\"Mean over 10 folds of MSE\")\n",
    "legend_elements = [Line2D([0], [0], marker='o', color='w', label='Neural Network',\n",
    "                          markerfacecolor='b', markersize=8),\n",
    "                Line2D([0], [0], marker='o', color='w', label='Linear Regression',\n",
    "                          markerfacecolor='r', markersize=8),]                 \n",
    "plt.legend(handles=legend_elements)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create Histogram\n",
    "plt.hist(-np.mean(NN3losses,axis =0), histtype='stepfilled', alpha=0.3, bins=10)\n",
    "plt.hist(-np.mean(Linearlosses,axis =0), histtype='stepfilled', alpha=0.3, bins=10);\n",
    "plt.title(\"Symmetric groups class function learning performance\")\n",
    "plt.xlabel(\"Mean MSE over 10 folds\")\n",
    "plt.ylabel(\"Frequency\")\n",
    "legend_elements = [Patch(facecolor='blue', edgecolor='b',\n",
    "                        label='Neural Network mean  '+ str(round(-np.mean(NN3losses,axis =0).mean(),6))),\n",
    "                   Patch(facecolor='red', edgecolor='r',\n",
    "                        label='Linear Regression mean  '+ str(round(-np.mean(Linearlosses,axis =0).mean(),6))),]\n",
    "plt.legend(handles=legend_elements)\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
