{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Sample from an orthogonal matrix of N dimension, 2k vectors and train a classifier, train classifier, note epsilon\n",
    "from scipy.stats import ortho_group\n",
    "import numpy as np\n",
    "import scipy.linalg\n",
    "from models import NeuralNet, CNN_OneD\n",
    "import torch\n",
    "from torch import nn\n",
    "from advertorch.attacks import LinfPGDAttack, L2PGDAttack, DDNL2Attack\n",
    "import ipdb\n",
    "import itertools"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# input_shape = 100\n",
    "# x = ortho_group.rvs(input_shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def train(model, data, labels, epochs = 1000):\n",
    "    model.train()\n",
    "    criterion = nn.CrossEntropyLoss()\n",
    "    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)\n",
    "    batch_size = data.shape[0]\n",
    "    for epoch in range(epochs):\n",
    "        if epoch==epochs//2:\n",
    "            for param_group in optimizer.param_groups:\n",
    "                param_group['lr'] /=10\n",
    "        correct = 0\n",
    "        total_loss = 0\n",
    "#         ipdb.set_trace()\n",
    "        for idx in range(data.shape[0]//batch_size):\n",
    "            batch_data, batch_labels = data[idx*batch_size:(idx+1)*batch_size], labels[idx*batch_size:(idx+1)*batch_size]\n",
    "            out = model(batch_data)\n",
    "            loss = criterion(out, batch_labels)\n",
    "            optimizer.zero_grad()\n",
    "            loss.backward()\n",
    "            optimizer.step()\n",
    "#             ipdb.set_trace()\n",
    "            pred = out.argmax(1)\n",
    "            correct += (pred == batch_labels).sum().item()\n",
    "            total_loss += loss.item()*batch_size\n",
    "#         if epoch%(epochs/5)==0:\n",
    "#             print(\"Epoch: {}, Accuracy: {:.2f}, Loss: {:.5f}\".format(epoch, 100*correct/data.shape[0], total_loss/data.shape[0]))\n",
    "    return model\n",
    "\n",
    "def test_pgd(model, data, labels, epsilon, res_row):\n",
    "    model.eval()\n",
    "    criterion = nn.CrossEntropyLoss(reduction='sum')\n",
    "    adversary = L2PGDAttack(model, loss_fn=criterion, nb_iter=100, eps_iter=epsilon/50,\n",
    "                                rand_init=True, eps=epsilon, clip_min=data.min().item(), clip_max=data.max().item(), targeted=False)\n",
    "    # Set requires_grad attribute of tensor. Important for Attack\n",
    "    data.requires_grad = True\n",
    "#     ipdb.set_trace()\n",
    "    perturbed_data = adversary.perturb(data, labels)\n",
    "    new_out = model(perturbed_data)\n",
    "#     ipdb.set_trace()\n",
    "    pred = new_out.argmax(1)\n",
    "    correct = (pred==labels).sum()\n",
    "    res_row.append(100*float(correct)/labels.shape[0])\n",
    "#     print(\"Epsilon: {}, Accuracy: {}\".format(epsilon, 100*float(correct)/labels.shape[0]))\n",
    "    \n",
    "def test_ddn(model, data, labels, res_row):\n",
    "    model.eval()\n",
    "    criterion = nn.CrossEntropyLoss(reduction='sum')\n",
    "    adversary = DDNL2Attack(model, nb_iter=1000, quantize = False, clip_min=data.min().item(), clip_max=data.max().item(),\n",
    "                           loss_fn=criterion,)\n",
    "#     adversary = L2PGDAttack(model, loss_fn=nn.MSELoss(reduction=\"sum\"), nb_iter=40, eps_iter=epsilon/20,\n",
    "#                                 rand_init=True, eps=epsilon, clip_min=data.min().item(), clip_max=data.max().item(), targeted=False)\n",
    "    # Set requires_grad attribute of tensor. Important for Attack\n",
    "    data.requires_grad = True\n",
    "#     ipdb.set_trace()\n",
    "    perturbed_data = adversary.perturb(data, labels)\n",
    "    new_out = model(perturbed_data)\n",
    "    pred = new_out.argmax(1)\n",
    "    correct = (pred==labels).sum()\n",
    "#     ipdb.set_trace()\n",
    "    l2_distances = np.linalg.norm((data - perturbed_data).detach().cpu().numpy(),\n",
    "                                  ord=2, axis=1)\n",
    "    res_row.append(np.mean(l2_distances))\n",
    "    res_row.append(np.median(l2_distances))\n",
    "#     print(\"Samples: {}, Median L2: {},\"\n",
    "#           \"Mean L2: {}, Accuracy: {}\".format(l2_distances.shape[0], l2_distances.median(),\n",
    "#                                              l2_distances.mean(), 100*float(correct)/labels.shape[0]))\n",
    "  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_linear_seperator(d1, d2, p):\n",
    "    z_1 = np.mean(d1, axis=0)\n",
    "    z_2 = np.mean(d2, axis=0)\n",
    "    z_1 /= np.linalg.norm(z_1)\n",
    "    z_2 /= np.linalg.norm(z_2)\n",
    "    \n",
    "    d1_n = z_1*p + d1*(1-p)\n",
    "    d2_n = z_2*p + d2*(1-p)\n",
    "    d1_n /= np.expand_dims(np.linalg.norm(d1_n, axis=1), 1)\n",
    "    d2_n /= np.expand_dims(np.linalg.norm(d2_n, axis=1), 1)\n",
    "    return d1_n, d2_n\n",
    "\n",
    "def sample_orth_datasets(dimension, samples, p):\n",
    "    if p < 0:\n",
    "        d1, d2 = x[:k], x[dimension//2:dimension//2+samples]\n",
    "    else:\n",
    "        d1, d2 = x[:k], x[dimension//2:dimension//2+samples]\n",
    "        z1, z2 = x[k-1], x[dimension//2+samples-1]\n",
    "#         d1 = d1*(1-p) + z1*p\n",
    "#         d2 = d2*(1-p) + z2*p\n",
    "        d1 = d1 + z1*p\n",
    "        d2 = d2 + z2*p\n",
    "#         d1 /= np.expand_dims(np.linalg.norm(d1, axis=1), 1)\n",
    "#         d2 /= np.expand_dims(np.linalg.norm(d2, axis=1), 1)\n",
    "    return d1, d2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4000.00000,50.00000,-1.00000,0.02147,0.02058\n",
      "-----------------------\n",
      "4000.00000,100.00000,-1.00000,0.01843,0.01777\n",
      "-----------------------\n",
      "4000.00000,200.00000,-1.00000,0.01619,0.01500\n",
      "-----------------------\n"
     ]
    }
   ],
   "source": [
    "# for k in [10, 20, 50, 100, 500, 999, 1000]:\n",
    "dimensions = [4000]\n",
    "# p_values = [-1, 0.01, 0.02, 0.05, 0.1, 0.2, 0.3]\n",
    "p_values = [-1]\n",
    "# samples = [50, 100, 250, 500, 1000]\n",
    "samples = [50, 100, 200]\n",
    "# samples = [50]\n",
    "# dimensions = [1000]\n",
    "# p_values = [-1]\n",
    "# samples = [500]\n",
    "# cols = ['Input Shape, Samples, p, ']\n",
    "data_all = []\n",
    "\n",
    "for input_shape, p  in itertools.product(dimensions, p_values):\n",
    "    x = ortho_group.rvs(input_shape)\n",
    "#     print(\"###################\")\n",
    "#     print(\"###################\")\n",
    "#     print(\"###################\")\n",
    "    for k in samples:\n",
    "        res_row = []\n",
    "#         if 2*k != input_shape:\n",
    "#             continue\n",
    "        d1, d2 = sample_orth_datasets(input_shape, k, p)\n",
    "#         d1, d2 = x[:k],x[input_shape//2:input_shape//2+k]\n",
    "#         d1, d2 = add_linear_seperator(d1, d2, p)\n",
    "#         d1, d2 = x[:k], -x[:k]\n",
    "        data = torch.tensor(np.concatenate((d1, d2))).float().cuda()\n",
    "        labels = torch.tensor(np.concatenate(([0]*d1.shape[0], [1]*d2.shape[0]))).long().cuda()\n",
    "        idx_random = np.random.permutation(data.shape[0])\n",
    "        data, labels = data[idx_random], labels[idx_random]\n",
    "#         print(\"{}, {}, {}\".format(input_shape, k, p))\n",
    "        res_row.append(input_shape)\n",
    "        res_row.append(k)\n",
    "        res_row.append(p)\n",
    "#         model = NeuralNet(input_shape, 1, 100, 2, no_final=True)\n",
    "        model = CNN_OneD(hidden_channels=128, kernel_size=1000,\n",
    "                         num_classes = 2, no_final=True)\n",
    "        model = model.cuda()\n",
    "        model = train(model, data, labels, 1000)\n",
    "#         print(\"-----------------------\")\n",
    "#         print(\"-----------------------\")\n",
    "#         for epsilon in [0.001, 0.005, 0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3]:\n",
    "#     #     for epsilon in [0.1, 0.12, 0.15, 0.18, 0.2, 0.3, 0.4, 0.5]:\n",
    "#             test_pgd(model, data, labels, epsilon, res_row)\n",
    "        test_ddn(model, data, labels, res_row)\n",
    "        data_all.append(res_row)\n",
    "        print(','.join(['{:.5f}'.format(x) for x in res_row]))\n",
    "    \n",
    "        print(\"-----------------------\")\n",
    "#         print(\"-----------------------\")\n",
    "#     print(\"###################\")\n",
    "#     print(\"###################\")\n",
    "#     print(\"###################\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "p = 0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# data_all[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle as pkl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('results/fcn_orthogonal_data.pkl', 'wb') as f:\n",
    "    pkl.dump(data_all, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# print(res_row)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# d1[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# np.concatenate(([-1]*d1.shape[0], [1]*d2.shape[0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "z = np.mean(x[:10], axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "z_1 = np.mean(x[:50], axis = 0)\n",
    "norm_1 = np.linalg.norm(z_1)\n",
    "z_2 = np.mean(x[50:], axis=0)\n",
    "norm_2 = np.linalg.norm(z_2)\n",
    "z_1 /= norm_1\n",
    "z_2 /= norm_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_n1 = x[:50]*0.96 + z_1*0.04"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(50, 1)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.expand_dims(np.linalg.norm(x_n1, axis=1), 1).shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'x_normed1' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-16-f13856de3218>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinalg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_normed1\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m: name 'x_normed1' is not defined"
     ]
    }
   ],
   "source": [
    "np.linalg.norm(x_normed1[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.sum(x_normed1[0]*x_normed1[3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
