{
 "cells": [
  {
   "cell_type": "markdown",
   "source": [
    "**This notebook creates De-Randomized Smoothed MalConv model for different ablation size and evaluate them.**"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "import numpy as np\n",
    "from secml.array import CArray\n",
    "from secml_malware.smoothed_malconv import get_dataset, create_smoothed_malconv, modify_dataset_for_smoothed_malconv, train_model, model_predict, get_majority_voting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "outputs": [],
   "source": [
    "#mal_path = \"secml_malware/data/malware_samples/test_folder\"\n",
    "#ben_path = \"secml_malware/data/goodware_samples/my_samples\"\n",
    "mal_path = \"/Users/shoumik/Desktop/UMD 1st semester/Computer & Network Security/Project/Codes/MalConv-New/FullDataset/output/malware\"\n",
    "ben_path = \"/Users/shoumik/Desktop/UMD 1st semester/Computer & Network Security/Project/Codes/MalConv-New/FullDataset/output/benign\"\n",
    "max_length = 2**20\n",
    "num_ablations = 20\n",
    "ablation_size = int(max_length/num_ablations)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100\n",
      "(100, 20, 52428)\n",
      "(100, 20)\n",
      "[[1. 1. 1. ... 1. 1. 1.]\n",
      " [1. 1. 1. ... 1. 1. 1.]\n",
      " [1. 1. 1. ... 1. 1. 1.]\n",
      " ...\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]]\n"
     ]
    }
   ],
   "source": [
    "X, y, file_names = get_dataset(mal_path, ben_path, max_length, 50)\n",
    "print(len(X))\n",
    "y = np.reshape(y, (-1, ))\n",
    "new_X, new_y = modify_dataset_for_smoothed_malconv(X, y, num_ablations)\n",
    "print(new_X.shape)\n",
    "print(new_y.shape)\n",
    "print(new_y)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "outputs": [],
   "source": [
    "tensor_X = torch.FloatTensor(new_X)\n",
    "tensor_y = torch.FloatTensor(new_y)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "outputs": [],
   "source": [
    "smoothed_malconv_models = create_smoothed_malconv(num_ablations, ablation_size)\n",
    "#print(smoothed_malconv_models)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "outputs": [],
   "source": [
    "smoothed_malconv_models = train_model(smoothed_malconv_models, num_ablations, tensor_X, tensor_y, epochs=5)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1 1 1 ... 0 1 0]\n",
      " [1 1 1 ... 0 1 1]\n",
      " [1 1 1 ... 0 1 1]\n",
      " ...\n",
      " [1 1 1 ... 1 1 1]\n",
      " [1 1 1 ... 1 1 1]\n",
      " [1 1 1 ... 1 1 1]]\n"
     ]
    }
   ],
   "source": [
    "#print(smoothed_malconv_models[0]._model.predict(tensor_X[:, 0, :]))\n",
    "y_preds_2d = model_predict(smoothed_malconv_models, num_ablations, tensor_X)\n",
    "print(y_preds_2d)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]\n"
     ]
    }
   ],
   "source": [
    "votes = get_majority_voting(y_preds_2d, new_y.shape[0])\n",
    "print(votes)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.52\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import accuracy_score\n",
    "print(accuracy_score(y, votes))"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
