{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1252,
     "status": "ok",
     "timestamp": 1599578618356,
     "user": {
      "displayName": "Shashi Kant",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiHv2woo7B1czb0XKsLBEOsgMWsuYITOryCdNNv7A=s64",
      "userId": "09783338171816410510"
     },
     "user_tz": -330
    },
    "id": "KVpizesFCowb"
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "import scipy\n",
    "from tensorflow import keras\n",
    "from tensorflow.keras import layers\n",
    "\n",
    "from tensorflow.keras.applications.vgg16 import VGG16\n",
    "from keras.preprocessing.image import load_img\n",
    "from keras.preprocessing.image import img_to_array\n",
    "from keras.applications.vgg16 import preprocess_input\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "from scipy import stats\n",
    "\n",
    "np.random.seed(0)\n",
    "tf.random.set_seed(0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "f4U93p9iqX_6"
   },
   "source": [
    "## Extracting features from VGG16"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 3503,
     "status": "ok",
     "timestamp": 1599578621593,
     "user": {
      "displayName": "Shashi Kant",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiHv2woo7B1czb0XKsLBEOsgMWsuYITOryCdNNv7A=s64",
      "userId": "09783338171816410510"
     },
     "user_tz": -330
    },
    "id": "1M4GZzmBk2XJ"
   },
   "outputs": [],
   "source": [
    "img_size = 224\n",
    "vgg16 = VGG16(weights='imagenet', include_top=True, pooling='max', input_shape = (img_size, img_size, 3))\n",
    "# vgg16.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 3271,
     "status": "ok",
     "timestamp": 1599578621597,
     "user": {
      "displayName": "Shashi Kant",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiHv2woo7B1czb0XKsLBEOsgMWsuYITOryCdNNv7A=s64",
      "userId": "09783338171816410510"
     },
     "user_tz": -330
    },
    "id": "xFFpLJ4FNLY9"
   },
   "outputs": [],
   "source": [
    "op = vgg16.get_layer('fc2').output\n",
    "ip = vgg16.input\n",
    "\n",
    "basemodel = keras.Model(\n",
    "    inputs=ip,\n",
    "    outputs=op,\n",
    ")\n",
    "\n",
    "basemodel.trainable=False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 13658,
     "status": "ok",
     "timestamp": 1599578632102,
     "user": {
      "displayName": "Shashi Kant",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiHv2woo7B1czb0XKsLBEOsgMWsuYITOryCdNNv7A=s64",
      "userId": "09783338171816410510"
     },
     "user_tz": -330
    },
    "id": "kQPMWv1pqmeP"
   },
   "outputs": [],
   "source": [
    "images = np.empty((50, img_size, img_size, 3))\n",
    "for i in range(50):\n",
    "  image = load_img(\"data/\" + str(i+1) +\".jpg\", target_size=(img_size, img_size))\n",
    "  image = img_to_array(image)\n",
    "\n",
    "  for j in range(img_size):\n",
    "    for k in range(img_size):\n",
    "      g = (image[j,k,0] + image[j,k,1] + image[j,k,2])/3\n",
    "      image[j,k,0] = g\n",
    "      image[j,k,1] = g\n",
    "      image[j,k,2] = g\n",
    "\n",
    "  images[i,:,:,:] = image\n",
    "\n",
    "images = preprocess_input(images)\n",
    "features = basemodel.predict(images)\n",
    "\n",
    "colors = np.empty((5, img_size, img_size, 3))\n",
    "for i in range(5):\n",
    "  image = load_img(\"data/C\" + str(i) + \".png\", target_size=(img_size, img_size))\n",
    "  image = img_to_array(image)\n",
    "  colors[i,:,:,:] = image\n",
    "\n",
    "colors = preprocess_input(colors)\n",
    "colorfeatures = basemodel.predict(colors)\n",
    "\n",
    "features = features.reshape(50,-1)\n",
    "colorfeatures = colorfeatures.reshape(5,-1)\n",
    "\n",
    "data = np.genfromtxt('data/bandw.csv', skip_header = 6, delimiter=',')\n",
    "num_sub = data.shape[0]\n",
    "humanpred = np.zeros((50,5))\n",
    "for i in range(50):\n",
    "  for j in range(num_sub):\n",
    "    color = int(data[j, i]);\n",
    "    humanpred[i, color] = humanpred[i, color]+1\n",
    "\n",
    "prob = humanpred/num_sub"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 13512,
     "status": "ok",
     "timestamp": 1599578632106,
     "user": {
      "displayName": "Shashi Kant",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiHv2woo7B1czb0XKsLBEOsgMWsuYITOryCdNNv7A=s64",
      "userId": "09783338171816410510"
     },
     "user_tz": -330
    },
    "id": "3o4bRkBQ2TwJ"
   },
   "outputs": [],
   "source": [
    "inputimages = np.zeros((250,features.shape[1]))\n",
    "inputcolors = np.zeros((250,features.shape[1]))\n",
    "outputprob = np.zeros((250))\n",
    "\n",
    "for i in range(50):\n",
    "  for j in range(5):\n",
    "    inputimages[5*i+j,:] = features[i,:]\n",
    "\n",
    "for i in range(5):\n",
    "  for j in range(50):\n",
    "    inputcolors[5*j+i] = colorfeatures[i, :]\n",
    "\n",
    "outputprob = prob.reshape(250)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "Aqm4sK5LQRUs"
   },
   "source": [
    "## Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 13132,
     "status": "ok",
     "timestamp": 1599578632107,
     "user": {
      "displayName": "Shashi Kant",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiHv2woo7B1czb0XKsLBEOsgMWsuYITOryCdNNv7A=s64",
      "userId": "09783338171816410510"
     },
     "user_tz": -330
    },
    "id": "ZRZR9zFuO9-2"
   },
   "outputs": [],
   "source": [
    "def get_model(op_features=200, color_transform=True):\n",
    "  imageinputs = keras.Input(shape=(features.shape[1]), name=\"imageinputs\")\n",
    "  colorinputs = keras.Input(shape=(features.shape[1]), name=\"colorinputs\")\n",
    "\n",
    "  dense = layers.Dense(op_features, name = \"linearlayer\", activation='relu')\n",
    "\n",
    "  imageresults = dense(imageinputs)\n",
    "  colorresults = dense(colorinputs)\n",
    "\n",
    "  if color_transform:\n",
    "    result = tf.keras.layers.Dot(axes=1, normalize=True, name=\"dot\")([imageresults, colorresults])\n",
    "  else:\n",
    "    result = tf.keras.layers.Dot(axes=1, normalize=True, name=\"dot\")([imageinputs, colorinputs])\n",
    "\n",
    "  model = keras.Model(\n",
    "      inputs=[imageinputs, colorinputs],\n",
    "      outputs=[result],\n",
    "  )\n",
    "\n",
    "  model.compile(loss = 'mean_squared_error', optimizer = tf.keras.optimizers.Adam(lr=0.001), metrics = ['mean_squared_error'])\n",
    "\n",
    "  return model\n",
    "\n",
    "def get_corr(output, outputprob, vs, ve, seq=[0, 1, 2, 3, 4]):\n",
    "  x = np.copy(output.reshape(50,5))[:, seq]\n",
    "\n",
    "  for i in range(50):\n",
    "    rowsum = np.sum(x[i,:])\n",
    "    for j in range(5):\n",
    "      x[i,j] = x[i,j]/rowsum\n",
    "\n",
    "  y = np.copy(outputprob.reshape(50,5))\n",
    "\n",
    "  testx = x.reshape(-1)[vs:ve]\n",
    "  testy = y.reshape(-1)[vs:ve]\n",
    "\n",
    "  return scipy.stats.pearsonr(testx, testy)[0], scipy.stats.pearsonr(testx, testy)[1], testx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 12912,
     "status": "ok",
     "timestamp": 1599578632108,
     "user": {
      "displayName": "Shashi Kant",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiHv2woo7B1czb0XKsLBEOsgMWsuYITOryCdNNv7A=s64",
      "userId": "09783338171816410510"
     },
     "user_tz": -330
    },
    "id": "CS-lgLMbKUZY"
   },
   "outputs": [],
   "source": [
    "# First one is mapping to original color. Others are for checks. \n",
    "# Note that we selected these sequence such that none of them align with original labels.\n",
    "\n",
    "seqs =  [[0, 1, 2, 3, 4],\n",
    "         [4, 3, 0, 2, 1],\n",
    "          [4, 2, 0, 1, 3],\n",
    "          [3, 4, 0, 2, 1],\n",
    "          [3, 0, 1, 4, 2],\n",
    "          [2, 3, 1, 4, 0],\n",
    "          [2, 4, 3, 1, 0],\n",
    "          [1, 0, 4, 2, 3],\n",
    "          [1, 2, 3, 4, 0]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 170
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 17630,
     "status": "ok",
     "timestamp": 1599578637061,
     "user": {
      "displayName": "Shashi Kant",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiHv2woo7B1czb0XKsLBEOsgMWsuYITOryCdNNv7A=s64",
      "userId": "09783338171816410510"
     },
     "user_tz": -330
    },
    "id": "93pqZwcFA32k",
    "outputId": "cccd8f7f-69e6-45b3-f9f7-30dc30585d8a"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Seq: [0, 1, 2, 3, 4] - Overall R: 0.33 and p-value: 0.0\n",
      "Seq: [4, 3, 0, 2, 1] - Overall R: 0.01 and p-value: 0.929826\n",
      "Seq: [4, 2, 0, 1, 3] - Overall R: -0.13 and p-value: 0.044568\n",
      "Seq: [3, 4, 0, 2, 1] - Overall R: -0.01 and p-value: 0.890461\n",
      "Seq: [3, 0, 1, 4, 2] - Overall R: -0.1 and p-value: 0.10963\n",
      "Seq: [2, 3, 1, 4, 0] - Overall R: -0.09 and p-value: 0.168306\n",
      "Seq: [2, 4, 3, 1, 0] - Overall R: 0.11 and p-value: 0.078558\n",
      "Seq: [1, 0, 4, 2, 3] - Overall R: 0.06 and p-value: 0.375242\n",
      "Seq: [1, 2, 3, 4, 0] - Overall R: -0.01 and p-value: 0.863059\n"
     ]
    }
   ],
   "source": [
    "# Raw\n",
    "\n",
    "for seq in seqs:\n",
    "  x = np.copy(inputcolors.reshape(50, 5, -1))[:, seq, :]\n",
    "  x = x.reshape((250, -1))\n",
    "\n",
    "  corr = [] #This contains correlation corresponding to specific validation data\n",
    "\n",
    "  pred = np.zeros(250) #This will contain the final prediction for each of the loop. Total 5 subset of validation 50 each.\n",
    "\n",
    "  # 5-fold cross validation \n",
    "  for vs in [0, 50, 100, 150, 200]:\n",
    "    ve = vs + 50\n",
    "\n",
    "    model = get_model(op_features=75, color_transform=False)\n",
    "\n",
    "    output = model.predict({\"imageinputs\": inputimages, \"colorinputs\": x})\n",
    "\n",
    "    R, _, pred[vs:ve] = get_corr(output, outputprob, vs, ve)\n",
    "\n",
    "    corr.append(R)\n",
    "\n",
    "  print(\"Seq:\", seq, \"- Overall R:\", round(get_corr(pred, outputprob, 0, 250)[0], 2), \"and p-value:\", round(get_corr(pred, outputprob, 0, 250)[1], 6))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 170
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 83456,
     "status": "ok",
     "timestamp": 1599578703112,
     "user": {
      "displayName": "Shashi Kant",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GiHv2woo7B1czb0XKsLBEOsgMWsuYITOryCdNNv7A=s64",
      "userId": "09783338171816410510"
     },
     "user_tz": -330
    },
    "id": "ygMZBSStrMBV",
    "outputId": "64ed691d-41ff-4095-9c85-8021882f2e91"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Seq: [0, 1, 2, 3, 4] - Overall R: 0.64 and p-value: 0.0\n",
      "Seq: [4, 3, 0, 2, 1] - Overall R: 0.04 and p-value: 0.56141\n",
      "Seq: [4, 2, 0, 1, 3] - Overall R: -0.32 and p-value: 0.0\n",
      "Seq: [3, 4, 0, 2, 1] - Overall R: 0.04 and p-value: 0.538466\n",
      "Seq: [3, 0, 1, 4, 2] - Overall R: -0.35 and p-value: 0.0\n",
      "Seq: [2, 3, 1, 4, 0] - Overall R: -0.34 and p-value: 0.0\n",
      "Seq: [2, 4, 3, 1, 0] - Overall R: 0.05 and p-value: 0.46052\n",
      "Seq: [1, 0, 4, 2, 3] - Overall R: -0.03 and p-value: 0.693661\n",
      "Seq: [1, 2, 3, 4, 0] - Overall R: -0.01 and p-value: 0.92984\n"
     ]
    }
   ],
   "source": [
    "# Transformed\n",
    "\n",
    "for seq in seqs:\n",
    "  x = np.copy(inputcolors.reshape(50, 5, -1))[:, seq, :]\n",
    "  x = x.reshape((250, -1))\n",
    "\n",
    "  corr = [] #This contains correlation corresponding to specific validation data\n",
    "\n",
    "  pred = np.zeros(250) #This will contain the final prediction for each of the loop. Total 5 subset of validation 50 each.\n",
    "\n",
    "  # 5-fold cross validation \n",
    "  for vs in [0, 50, 100, 150, 200]:\n",
    "    ve = vs + 50\n",
    "\n",
    "    model = get_model(op_features=75)\n",
    "\n",
    "    history = model.fit(\n",
    "        {\"imageinputs\": inputimages[[*range(vs)] + [*range(ve, 250)]], \"colorinputs\": inputcolors[[*range(vs)] + [*range(ve, 250)]]},\n",
    "        {\"dot\": outputprob[[*range(vs)] + [*range(ve, 250)]]},\n",
    "        epochs=30,\n",
    "        batch_size=10,\n",
    "        shuffle=True,\n",
    "        verbose=0,\n",
    "    )\n",
    "\n",
    "    output = model.predict({\"imageinputs\": inputimages, \"colorinputs\": x})\n",
    "\n",
    "    R, _, pred[vs:ve] = get_corr(output, outputprob, vs, ve)\n",
    "\n",
    "    corr.append(R)\n",
    "\n",
    "  print(\"Seq:\", seq, \"- Overall R:\", round(get_corr(pred, outputprob, 0, 250)[0], 2), \"and p-value:\", round(get_corr(pred, outputprob, 0, 250)[1], 6))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "jyxFaGqaB8oW"
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "collapsed_sections": [],
   "name": "With_Wrong_Class.ipynb",
   "provenance": [
    {
     "file_id": "1UE_yDN7vICMTyp3wv4qOQxfoZYEpAE8f",
     "timestamp": 1595256575799
    }
   ]
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
