{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "7c88cc1a-bc81-4993-8d32-9590bb72c662",
   "metadata": {},
   "source": [
    "## DataPreprocessing-1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "c8572d82-c801-4b82-9948-1bbee704cd59",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Filename</th>\n",
       "      <th>target</th>\n",
       "      <th>Total System Calls</th>\n",
       "      <th>Unique System Calls</th>\n",
       "      <th>Unique System Calls List</th>\n",
       "      <th>Total Errors</th>\n",
       "      <th>Unique Errors</th>\n",
       "      <th>Unique Errors List</th>\n",
       "      <th>File Operations</th>\n",
       "      <th>Unique File Operations</th>\n",
       "      <th>...</th>\n",
       "      <th>Pattern 1</th>\n",
       "      <th>Pattern 2</th>\n",
       "      <th>Pattern 3</th>\n",
       "      <th>Pattern 4</th>\n",
       "      <th>Pattern 5</th>\n",
       "      <th>Advanced Pattern 1</th>\n",
       "      <th>Advanced Pattern 2</th>\n",
       "      <th>Advanced Pattern 3</th>\n",
       "      <th>Advanced Pattern 4</th>\n",
       "      <th>Advanced Pattern 5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>strace_output_18086.18086</td>\n",
       "      <td>0</td>\n",
       "      <td>25442</td>\n",
       "      <td>39</td>\n",
       "      <td>brk, newfstatat, openat, fstat, ioctl, lseek, ...</td>\n",
       "      <td>2821</td>\n",
       "      <td>6</td>\n",
       "      <td>ENOTTY, ENOENT, ENOTDIR, EACCES, ESPIPE, EEXIST</td>\n",
       "      <td>19892</td>\n",
       "      <td>8</td>\n",
       "      <td>...</td>\n",
       "      <td>newfstatat -&gt; newfstatat -&gt; newfstatat</td>\n",
       "      <td>fstat -&gt; ioctl -&gt; lseek</td>\n",
       "      <td>openat -&gt; fstat -&gt; ioctl</td>\n",
       "      <td>ioctl -&gt; lseek -&gt; lseek</td>\n",
       "      <td>lseek -&gt; lseek -&gt; fstat</td>\n",
       "      <td>openat -&gt; fstat -&gt; ioctl -&gt; no-error -&gt; no-fd</td>\n",
       "      <td>read -&gt; read -&gt; close -&gt; no-error -&gt; no-fd</td>\n",
       "      <td>fstat -&gt; ioctl -&gt; lseek -&gt; no-error -&gt; no-fd</td>\n",
       "      <td>lseek -&gt; lseek -&gt; fstat -&gt; no-error -&gt; no-fd</td>\n",
       "      <td>newfstatat -&gt; newfstatat -&gt; newfstatat -&gt; no-e...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>strace_output_18086.18127</td>\n",
       "      <td>0</td>\n",
       "      <td>186</td>\n",
       "      <td>34</td>\n",
       "      <td>close, dup3, rt_sigaction, rt_sigprocmask, clo...</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>ENOENT, ECHILD, ENOTTY</td>\n",
       "      <td>46</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>rt_sigaction -&gt; rt_sigaction -&gt; rt_sigaction</td>\n",
       "      <td>pipe2 -&gt; clone -&gt; close</td>\n",
       "      <td>clone -&gt; close -&gt; read</td>\n",
       "      <td>close -&gt; read -&gt; read</td>\n",
       "      <td>read -&gt; read -&gt; rt_sigreturn</td>\n",
       "      <td>rt_sigaction -&gt; rt_sigaction -&gt; rt_sigaction -...</td>\n",
       "      <td>pipe2 -&gt; clone -&gt; close -&gt; no-error -&gt; no-fd</td>\n",
       "      <td>read -&gt; rt_sigreturn -&gt; close -&gt; no-error -&gt; n...</td>\n",
       "      <td>rt_sigreturn -&gt; close -&gt; wait4 -&gt; no-error -&gt; ...</td>\n",
       "      <td>close -&gt; wait4 -&gt; wait4 -&gt; no-error -&gt; no-fd</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>strace_output_18086.18128</td>\n",
       "      <td>0</td>\n",
       "      <td>124</td>\n",
       "      <td>21</td>\n",
       "      <td>set_robust_list, close, dup3, newfstatat, exec...</td>\n",
       "      <td>17</td>\n",
       "      <td>1</td>\n",
       "      <td>ENOENT</td>\n",
       "      <td>83</td>\n",
       "      <td>7</td>\n",
       "      <td>...</td>\n",
       "      <td>openat -&gt; fstat -&gt; mmap</td>\n",
       "      <td>fstat -&gt; mmap -&gt; close</td>\n",
       "      <td>mmap -&gt; close -&gt; openat</td>\n",
       "      <td>openat -&gt; openat -&gt; fstat</td>\n",
       "      <td>close -&gt; openat -&gt; openat</td>\n",
       "      <td>openat -&gt; fstat -&gt; mmap -&gt; no-error -&gt; no-fd</td>\n",
       "      <td>fstat -&gt; mmap -&gt; close -&gt; no-error -&gt; no-fd</td>\n",
       "      <td>mmap -&gt; close -&gt; openat -&gt; no-error -&gt; no-fd</td>\n",
       "      <td>openat -&gt; openat -&gt; fstat -&gt; error=ENOENT -&gt; n...</td>\n",
       "      <td>close -&gt; openat -&gt; openat -&gt; no-error -&gt; no-fd</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>strace_output_18086.18129</td>\n",
       "      <td>0</td>\n",
       "      <td>33</td>\n",
       "      <td>9</td>\n",
       "      <td>set_robust_list, close, dup3, pipe2, clone, ne...</td>\n",
       "      <td>10</td>\n",
       "      <td>3</td>\n",
       "      <td>ENOENT, EBADF, ECHILD</td>\n",
       "      <td>18</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>newfstatat -&gt; newfstatat -&gt; newfstatat</td>\n",
       "      <td>close -&gt; newfstatat -&gt; newfstatat</td>\n",
       "      <td>clone -&gt; close -&gt; close</td>\n",
       "      <td>wait4 -&gt; rt_sigreturn -&gt; wait4</td>\n",
       "      <td>set_robust_list -&gt; close -&gt; close</td>\n",
       "      <td>newfstatat -&gt; newfstatat -&gt; newfstatat -&gt; erro...</td>\n",
       "      <td>close -&gt; newfstatat -&gt; newfstatat -&gt; no-error ...</td>\n",
       "      <td>clone -&gt; close -&gt; close -&gt; no-error -&gt; no-fd</td>\n",
       "      <td>set_robust_list -&gt; close -&gt; close -&gt; no-error ...</td>\n",
       "      <td>close -&gt; close -&gt; dup3 -&gt; no-error -&gt; no-fd</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>strace_output_18086.18130</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>set_robust_list, close, dup3, write, exit_group</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>set_robust_list -&gt; close -&gt; dup3</td>\n",
       "      <td>close -&gt; dup3 -&gt; close</td>\n",
       "      <td>dup3 -&gt; close -&gt; write</td>\n",
       "      <td>close -&gt; write -&gt; exit_group</td>\n",
       "      <td>NaN</td>\n",
       "      <td>set_robust_list -&gt; close -&gt; dup3 -&gt; no-error -...</td>\n",
       "      <td>close -&gt; dup3 -&gt; close -&gt; no-error -&gt; no-fd</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 148 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                    Filename  target  Total System Calls  Unique System Calls  \\\n",
       "0  strace_output_18086.18086       0               25442                   39   \n",
       "1  strace_output_18086.18127       0                 186                   34   \n",
       "2  strace_output_18086.18128       0                 124                   21   \n",
       "3  strace_output_18086.18129       0                  33                    9   \n",
       "4  strace_output_18086.18130       0                   6                    5   \n",
       "\n",
       "                            Unique System Calls List  Total Errors  \\\n",
       "0  brk, newfstatat, openat, fstat, ioctl, lseek, ...          2821   \n",
       "1  close, dup3, rt_sigaction, rt_sigprocmask, clo...            11   \n",
       "2  set_robust_list, close, dup3, newfstatat, exec...            17   \n",
       "3  set_robust_list, close, dup3, pipe2, clone, ne...            10   \n",
       "4    set_robust_list, close, dup3, write, exit_group             0   \n",
       "\n",
       "   Unique Errors                               Unique Errors List  \\\n",
       "0              6  ENOTTY, ENOENT, ENOTDIR, EACCES, ESPIPE, EEXIST   \n",
       "1              3                           ENOENT, ECHILD, ENOTTY   \n",
       "2              1                                           ENOENT   \n",
       "3              3                            ENOENT, EBADF, ECHILD   \n",
       "4              0                                              NaN   \n",
       "\n",
       "   File Operations  Unique File Operations  ...  \\\n",
       "0            19892                       8  ...   \n",
       "1               46                       6  ...   \n",
       "2               83                       7  ...   \n",
       "3               18                       2  ...   \n",
       "4                3                       2  ...   \n",
       "\n",
       "                                      Pattern 1  \\\n",
       "0        newfstatat -> newfstatat -> newfstatat   \n",
       "1  rt_sigaction -> rt_sigaction -> rt_sigaction   \n",
       "2                       openat -> fstat -> mmap   \n",
       "3        newfstatat -> newfstatat -> newfstatat   \n",
       "4              set_robust_list -> close -> dup3   \n",
       "\n",
       "                           Pattern 2                 Pattern 3  \\\n",
       "0            fstat -> ioctl -> lseek  openat -> fstat -> ioctl   \n",
       "1            pipe2 -> clone -> close    clone -> close -> read   \n",
       "2             fstat -> mmap -> close   mmap -> close -> openat   \n",
       "3  close -> newfstatat -> newfstatat   clone -> close -> close   \n",
       "4             close -> dup3 -> close    dup3 -> close -> write   \n",
       "\n",
       "                        Pattern 4                          Pattern 5  \\\n",
       "0         ioctl -> lseek -> lseek            lseek -> lseek -> fstat   \n",
       "1           close -> read -> read       read -> read -> rt_sigreturn   \n",
       "2       openat -> openat -> fstat          close -> openat -> openat   \n",
       "3  wait4 -> rt_sigreturn -> wait4  set_robust_list -> close -> close   \n",
       "4    close -> write -> exit_group                                NaN   \n",
       "\n",
       "                                  Advanced Pattern 1  \\\n",
       "0      openat -> fstat -> ioctl -> no-error -> no-fd   \n",
       "1  rt_sigaction -> rt_sigaction -> rt_sigaction -...   \n",
       "2       openat -> fstat -> mmap -> no-error -> no-fd   \n",
       "3  newfstatat -> newfstatat -> newfstatat -> erro...   \n",
       "4  set_robust_list -> close -> dup3 -> no-error -...   \n",
       "\n",
       "                                  Advanced Pattern 2  \\\n",
       "0         read -> read -> close -> no-error -> no-fd   \n",
       "1       pipe2 -> clone -> close -> no-error -> no-fd   \n",
       "2        fstat -> mmap -> close -> no-error -> no-fd   \n",
       "3  close -> newfstatat -> newfstatat -> no-error ...   \n",
       "4        close -> dup3 -> close -> no-error -> no-fd   \n",
       "\n",
       "                                  Advanced Pattern 3  \\\n",
       "0       fstat -> ioctl -> lseek -> no-error -> no-fd   \n",
       "1  read -> rt_sigreturn -> close -> no-error -> n...   \n",
       "2       mmap -> close -> openat -> no-error -> no-fd   \n",
       "3       clone -> close -> close -> no-error -> no-fd   \n",
       "4                                                NaN   \n",
       "\n",
       "                                  Advanced Pattern 4  \\\n",
       "0       lseek -> lseek -> fstat -> no-error -> no-fd   \n",
       "1  rt_sigreturn -> close -> wait4 -> no-error -> ...   \n",
       "2  openat -> openat -> fstat -> error=ENOENT -> n...   \n",
       "3  set_robust_list -> close -> close -> no-error ...   \n",
       "4                                                NaN   \n",
       "\n",
       "                                  Advanced Pattern 5  \n",
       "0  newfstatat -> newfstatat -> newfstatat -> no-e...  \n",
       "1       close -> wait4 -> wait4 -> no-error -> no-fd  \n",
       "2     close -> openat -> openat -> no-error -> no-fd  \n",
       "3        close -> close -> dup3 -> no-error -> no-fd  \n",
       "4                                                NaN  \n",
       "\n",
       "[5 rows x 148 columns]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "file_path = 'trace_statistics_with_advanced_patterns.csv'\n",
    "df = pd.read_csv(file_path)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "8c456482-4c76-4d54-8d99-894eadad1cd6",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\N11894571\\AppData\\Local\\Temp\\ipykernel_30040\\600274108.py:2: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'Unknown' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.\n",
      "  df.fillna('Unknown', inplace=True)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Filename</th>\n",
       "      <th>target</th>\n",
       "      <th>Total System Calls</th>\n",
       "      <th>Unique System Calls</th>\n",
       "      <th>Unique System Calls List</th>\n",
       "      <th>Total Errors</th>\n",
       "      <th>Unique Errors</th>\n",
       "      <th>Unique Errors List</th>\n",
       "      <th>File Operations</th>\n",
       "      <th>Unique File Operations</th>\n",
       "      <th>...</th>\n",
       "      <th>Pattern 1</th>\n",
       "      <th>Pattern 2</th>\n",
       "      <th>Pattern 3</th>\n",
       "      <th>Pattern 4</th>\n",
       "      <th>Pattern 5</th>\n",
       "      <th>Advanced Pattern 1</th>\n",
       "      <th>Advanced Pattern 2</th>\n",
       "      <th>Advanced Pattern 3</th>\n",
       "      <th>Advanced Pattern 4</th>\n",
       "      <th>Advanced Pattern 5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>strace_output_18086.18086</td>\n",
       "      <td>0</td>\n",
       "      <td>0.737147</td>\n",
       "      <td>39</td>\n",
       "      <td>0</td>\n",
       "      <td>0.778207</td>\n",
       "      <td>6</td>\n",
       "      <td>8</td>\n",
       "      <td>0.713052</td>\n",
       "      <td>8</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>12</td>\n",
       "      <td>6</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>16</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>strace_output_18086.18127</td>\n",
       "      <td>0</td>\n",
       "      <td>0.005216</td>\n",
       "      <td>34</td>\n",
       "      <td>11</td>\n",
       "      <td>0.003034</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0.001649</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>6</td>\n",
       "      <td>11</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>8</td>\n",
       "      <td>15</td>\n",
       "      <td>10</td>\n",
       "      <td>14</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>strace_output_18086.18128</td>\n",
       "      <td>0</td>\n",
       "      <td>0.003420</td>\n",
       "      <td>21</td>\n",
       "      <td>19</td>\n",
       "      <td>0.004690</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.002975</td>\n",
       "      <td>7</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "      <td>11</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>strace_output_18086.18129</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000782</td>\n",
       "      <td>9</td>\n",
       "      <td>20</td>\n",
       "      <td>0.002759</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000645</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>16</td>\n",
       "      <td>13</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>15</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>strace_output_18086.18130</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5</td>\n",
       "      <td>21</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>0.000108</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95</th>\n",
       "      <td>strace_output_44189.44248</td>\n",
       "      <td>1</td>\n",
       "      <td>0.003275</td>\n",
       "      <td>20</td>\n",
       "      <td>23</td>\n",
       "      <td>0.003862</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>0.002760</td>\n",
       "      <td>5</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "      <td>11</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>strace_output_44189.44250</td>\n",
       "      <td>1</td>\n",
       "      <td>0.005245</td>\n",
       "      <td>23</td>\n",
       "      <td>10</td>\n",
       "      <td>0.004690</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.002688</td>\n",
       "      <td>5</td>\n",
       "      <td>...</td>\n",
       "      <td>6</td>\n",
       "      <td>10</td>\n",
       "      <td>5</td>\n",
       "      <td>7</td>\n",
       "      <td>10</td>\n",
       "      <td>8</td>\n",
       "      <td>14</td>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>strace_output_44189.44292</td>\n",
       "      <td>1</td>\n",
       "      <td>0.226482</td>\n",
       "      <td>40</td>\n",
       "      <td>2</td>\n",
       "      <td>0.496552</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>0.242248</td>\n",
       "      <td>7</td>\n",
       "      <td>...</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>12</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98</th>\n",
       "      <td>strace_output_44189.44293</td>\n",
       "      <td>1</td>\n",
       "      <td>0.226482</td>\n",
       "      <td>40</td>\n",
       "      <td>2</td>\n",
       "      <td>0.496552</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>0.242248</td>\n",
       "      <td>7</td>\n",
       "      <td>...</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>12</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>strace_output_44189.44294</td>\n",
       "      <td>1</td>\n",
       "      <td>0.226482</td>\n",
       "      <td>40</td>\n",
       "      <td>2</td>\n",
       "      <td>0.496552</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>0.242248</td>\n",
       "      <td>7</td>\n",
       "      <td>...</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>12</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>100 rows × 148 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                     Filename  target  Total System Calls  \\\n",
       "0   strace_output_18086.18086       0            0.737147   \n",
       "1   strace_output_18086.18127       0            0.005216   \n",
       "2   strace_output_18086.18128       0            0.003420   \n",
       "3   strace_output_18086.18129       0            0.000782   \n",
       "4   strace_output_18086.18130       0            0.000000   \n",
       "..                        ...     ...                 ...   \n",
       "95  strace_output_44189.44248       1            0.003275   \n",
       "96  strace_output_44189.44250       1            0.005245   \n",
       "97  strace_output_44189.44292       1            0.226482   \n",
       "98  strace_output_44189.44293       1            0.226482   \n",
       "99  strace_output_44189.44294       1            0.226482   \n",
       "\n",
       "    Unique System Calls  Unique System Calls List  Total Errors  \\\n",
       "0                    39                         0      0.778207   \n",
       "1                    34                        11      0.003034   \n",
       "2                    21                        19      0.004690   \n",
       "3                     9                        20      0.002759   \n",
       "4                     5                        21      0.000000   \n",
       "..                  ...                       ...           ...   \n",
       "95                   20                        23      0.003862   \n",
       "96                   23                        10      0.004690   \n",
       "97                   40                         2      0.496552   \n",
       "98                   40                         2      0.496552   \n",
       "99                   40                         2      0.496552   \n",
       "\n",
       "    Unique Errors  Unique Errors List  File Operations  \\\n",
       "0               6                   8         0.713052   \n",
       "1               3                   2         0.001649   \n",
       "2               1                   0         0.002975   \n",
       "3               3                   1         0.000645   \n",
       "4               0                  12         0.000108   \n",
       "..            ...                 ...              ...   \n",
       "95              2                   7         0.002760   \n",
       "96              1                   0         0.002688   \n",
       "97              5                   4         0.242248   \n",
       "98              5                   4         0.242248   \n",
       "99              5                   4         0.242248   \n",
       "\n",
       "    Unique File Operations  ...  Pattern 1  Pattern 2  Pattern 3 Pattern 4  \\\n",
       "0                        8  ...          3          2         12         6   \n",
       "1                        6  ...          6         11          1         1   \n",
       "2                        7  ...          4          3         10        11   \n",
       "3                        2  ...          3          1          0        16   \n",
       "4                        2  ...          7          0          3         2   \n",
       "..                     ...  ...        ...        ...        ...       ...   \n",
       "95                       5  ...          4          3         10        11   \n",
       "96                       5  ...          6         10          5         7   \n",
       "97                       7  ...          5          2         12        12   \n",
       "98                       7  ...          5          2         12        12   \n",
       "99                       7  ...          5          2         12        12   \n",
       "\n",
       "    Pattern 5  Advanced Pattern 1 Advanced Pattern 2  Advanced Pattern 3  \\\n",
       "0           9                   5                 16                   3   \n",
       "1          11                   8                 15                  10   \n",
       "2           3                   6                  3                   7   \n",
       "3          13                   3                  1                   1   \n",
       "4           0                   9                  0                   0   \n",
       "..        ...                 ...                ...                 ...   \n",
       "95          3                   6                  3                   7   \n",
       "96         10                   8                 14                   4   \n",
       "97          8                   7                  2                   9   \n",
       "98          8                   7                  2                   9   \n",
       "99          8                   7                  2                   9   \n",
       "\n",
       "    Advanced Pattern 4 Advanced Pattern 5  \n",
       "0                    7                 12  \n",
       "1                   14                  5  \n",
       "2                   11                  4  \n",
       "3                   15                  1  \n",
       "4                    0                  0  \n",
       "..                 ...                ...  \n",
       "95                  11                  4  \n",
       "96                   8                 13  \n",
       "97                  12                 10  \n",
       "98                  12                 10  \n",
       "99                  12                 10  \n",
       "\n",
       "[100 rows x 148 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Handle missing values - fill missing patterns with 'Unknown'\n",
    "df.fillna('Unknown', inplace=True)\n",
    "\n",
    "# Encoding categorical data - Convert patterns and system calls lists to numerical values using label encoding for now\n",
    "from sklearn.preprocessing import LabelEncoder, MinMaxScaler\n",
    "\n",
    "# Initialize label encoder\n",
    "label_encoder = LabelEncoder()\n",
    "\n",
    "# List of columns to encode\n",
    "columns_to_encode = ['Unique System Calls List', 'Unique Errors List', 'Unique File Operations List', \n",
    "                     'Pattern 1', 'Pattern 2', 'Pattern 3', 'Pattern 4', 'Pattern 5',\n",
    "                     'Advanced Pattern 1', 'Advanced Pattern 2', 'Advanced Pattern 3', \n",
    "                     'Advanced Pattern 4', 'Advanced Pattern 5']\n",
    "\n",
    "# Apply label encoding\n",
    "for column in columns_to_encode:\n",
    "    df[column] = label_encoder.fit_transform(df[column])\n",
    "\n",
    "# Normalize the numerical columns\n",
    "scaler = MinMaxScaler()\n",
    "\n",
    "# List of numerical columns to normalize\n",
    "numerical_columns = ['Total System Calls', 'Total Errors', 'File Operations', 'Memory Operations', \n",
    "                     'Network Operations', 'Process Management Operations', 'I/O Operations', \n",
    "                     'Time Operations', 'Signal Operations', 'IPC Operations', 'Thread Operations', \n",
    "                     'Resource Operations', 'Filesystem Operations', 'Security Operations', \n",
    "                     'Miscellaneous Operations', 'Average System Calls per Line']\n",
    "\n",
    "# Apply MinMax normalization\n",
    "df[numerical_columns] = scaler.fit_transform(df[numerical_columns])\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "6909fe37-d0d9-4e0d-b881-218d163aab1c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 82ms/step - accuracy: 0.4078 - loss: 0.9044 - val_accuracy: 0.3000 - val_loss: 0.9251\n",
      "Epoch 2/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 24ms/step - accuracy: 0.5602 - loss: 0.7277 - val_accuracy: 0.2500 - val_loss: 0.8131\n",
      "Epoch 3/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 21ms/step - accuracy: 0.5750 - loss: 0.6699 - val_accuracy: 0.6000 - val_loss: 0.7336\n",
      "Epoch 4/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 28ms/step - accuracy: 0.5516 - loss: 0.6556 - val_accuracy: 0.6000 - val_loss: 0.7384\n",
      "Epoch 5/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.5039 - loss: 0.6627 - val_accuracy: 0.3500 - val_loss: 0.7729\n",
      "Epoch 6/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 16ms/step - accuracy: 0.5844 - loss: 0.6481 - val_accuracy: 0.3000 - val_loss: 0.7792\n",
      "Epoch 7/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 25ms/step - accuracy: 0.5547 - loss: 0.6323 - val_accuracy: 0.3000 - val_loss: 0.7681\n",
      "Epoch 8/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 25ms/step - accuracy: 0.6328 - loss: 0.6019 - val_accuracy: 0.4000 - val_loss: 0.7426\n",
      "Epoch 9/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 26ms/step - accuracy: 0.5961 - loss: 0.6336 - val_accuracy: 0.5500 - val_loss: 0.7327\n",
      "Epoch 10/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 17ms/step - accuracy: 0.5805 - loss: 0.6294 - val_accuracy: 0.4500 - val_loss: 0.7537\n",
      "Epoch 11/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 16ms/step - accuracy: 0.6469 - loss: 0.5955 - val_accuracy: 0.4500 - val_loss: 0.7593\n",
      "Epoch 12/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 17ms/step - accuracy: 0.6664 - loss: 0.5887 - val_accuracy: 0.4000 - val_loss: 0.7650\n",
      "Epoch 13/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 19ms/step - accuracy: 0.6727 - loss: 0.5834 - val_accuracy: 0.4000 - val_loss: 0.7674\n",
      "Epoch 14/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.6672 - loss: 0.5790 - val_accuracy: 0.4000 - val_loss: 0.7769\n",
      "Epoch 15/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 18ms/step - accuracy: 0.6531 - loss: 0.5801 - val_accuracy: 0.4000 - val_loss: 0.7774\n",
      "Epoch 16/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 17ms/step - accuracy: 0.6305 - loss: 0.5789 - val_accuracy: 0.4000 - val_loss: 0.7817\n",
      "Epoch 17/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 19ms/step - accuracy: 0.6492 - loss: 0.5746 - val_accuracy: 0.3000 - val_loss: 0.8110\n",
      "Epoch 18/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.6453 - loss: 0.5800 - val_accuracy: 0.3000 - val_loss: 0.8093\n",
      "Epoch 19/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 39ms/step - accuracy: 0.6406 - loss: 0.5590 - val_accuracy: 0.5000 - val_loss: 0.7687\n",
      "Epoch 20/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 26ms/step - accuracy: 0.6313 - loss: 0.5851 - val_accuracy: 0.6000 - val_loss: 0.7562\n",
      "Epoch 21/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step - accuracy: 0.6211 - loss: 0.5925 - val_accuracy: 0.6000 - val_loss: 0.7597\n",
      "Epoch 22/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 71ms/step - accuracy: 0.6633 - loss: 0.5663 - val_accuracy: 0.3500 - val_loss: 0.8217\n",
      "Epoch 23/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 40ms/step - accuracy: 0.6453 - loss: 0.5629 - val_accuracy: 0.3000 - val_loss: 0.8637\n",
      "Epoch 24/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 54ms/step - accuracy: 0.6453 - loss: 0.5654 - val_accuracy: 0.3500 - val_loss: 0.8266\n",
      "Epoch 25/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 37ms/step - accuracy: 0.6445 - loss: 0.5954 - val_accuracy: 0.5500 - val_loss: 0.7952\n",
      "Epoch 26/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 56ms/step - accuracy: 0.6469 - loss: 0.5609 - val_accuracy: 0.5500 - val_loss: 0.8135\n",
      "Epoch 27/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 42ms/step - accuracy: 0.6438 - loss: 0.5491 - val_accuracy: 0.5500 - val_loss: 0.8112\n",
      "Epoch 28/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 41ms/step - accuracy: 0.6211 - loss: 0.5617 - val_accuracy: 0.5000 - val_loss: 0.8267\n",
      "Epoch 29/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 39ms/step - accuracy: 0.5914 - loss: 0.5690 - val_accuracy: 0.5000 - val_loss: 0.8138\n",
      "Epoch 30/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 36ms/step - accuracy: 0.6031 - loss: 0.5768 - val_accuracy: 0.5500 - val_loss: 0.8009\n",
      "Epoch 31/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 39ms/step - accuracy: 0.6086 - loss: 0.5608 - val_accuracy: 0.3500 - val_loss: 0.8583\n",
      "Epoch 32/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 43ms/step - accuracy: 0.6555 - loss: 0.5543 - val_accuracy: 0.4500 - val_loss: 0.8463\n",
      "Epoch 33/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 47ms/step - accuracy: 0.6633 - loss: 0.5517 - val_accuracy: 0.4500 - val_loss: 0.8612\n",
      "Epoch 34/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 40ms/step - accuracy: 0.6336 - loss: 0.5371 - val_accuracy: 0.4000 - val_loss: 0.8788\n",
      "Epoch 35/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 16ms/step - accuracy: 0.6766 - loss: 0.5319 - val_accuracy: 0.4000 - val_loss: 0.8658\n",
      "Epoch 36/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 21ms/step - accuracy: 0.6672 - loss: 0.5459 - val_accuracy: 0.5500 - val_loss: 0.8291\n",
      "Epoch 37/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 26ms/step - accuracy: 0.6086 - loss: 0.5331 - val_accuracy: 0.5000 - val_loss: 0.8434\n",
      "Epoch 38/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 18ms/step - accuracy: 0.6750 - loss: 0.5268 - val_accuracy: 0.4500 - val_loss: 0.8582\n",
      "Epoch 39/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 17ms/step - accuracy: 0.6727 - loss: 0.5244 - val_accuracy: 0.5500 - val_loss: 0.8239\n",
      "Epoch 40/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 18ms/step - accuracy: 0.6031 - loss: 0.5521 - val_accuracy: 0.6000 - val_loss: 0.8237\n",
      "Epoch 41/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 18ms/step - accuracy: 0.6531 - loss: 0.5492 - val_accuracy: 0.4000 - val_loss: 0.8650\n",
      "Epoch 42/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 17ms/step - accuracy: 0.6727 - loss: 0.5272 - val_accuracy: 0.3500 - val_loss: 0.9172\n",
      "Epoch 43/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 16ms/step - accuracy: 0.6531 - loss: 0.5503 - val_accuracy: 0.5000 - val_loss: 0.8663\n",
      "Epoch 44/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 20ms/step - accuracy: 0.6789 - loss: 0.5100 - val_accuracy: 0.6000 - val_loss: 0.8251\n",
      "Epoch 45/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 21ms/step - accuracy: 0.6625 - loss: 0.5547 - val_accuracy: 0.6000 - val_loss: 0.8240\n",
      "Epoch 46/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 18ms/step - accuracy: 0.6313 - loss: 0.5584 - val_accuracy: 0.3500 - val_loss: 0.8836\n",
      "Epoch 47/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.6492 - loss: 0.5479 - val_accuracy: 0.3500 - val_loss: 0.8951\n",
      "Epoch 48/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 18ms/step - accuracy: 0.6438 - loss: 0.5567 - val_accuracy: 0.5000 - val_loss: 0.8489\n",
      "Epoch 49/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 17ms/step - accuracy: 0.5953 - loss: 0.5454 - val_accuracy: 0.5500 - val_loss: 0.8557\n",
      "Epoch 50/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 16ms/step - accuracy: 0.6492 - loss: 0.5406 - val_accuracy: 0.5000 - val_loss: 0.9046\n"
     ]
    }
   ],
   "source": [
    "from tensorflow.keras.models import Model\n",
    "from tensorflow.keras.layers import Dense, Input, concatenate\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "# Step 1: Separate features from the target (if you have a target)\n",
    "X = df.drop(columns=['Filename'])  # Drop Filename column\n",
    "y = df['target']  # Replace 'target_column' with the actual target column name\n",
    "\n",
    "# Step 2: Split the data into training and testing sets\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
    "\n",
    "# Step 3: Define inputs for different modalities (numerical/categorical)\n",
    "input_numerical = Input(shape=(len(numerical_columns),), name='numerical_input')\n",
    "\n",
    "# You can define another input layer for categorical or pattern-based features if necessary\n",
    "input_categorical = Input(shape=(len(columns_to_encode),), name='categorical_input')\n",
    "\n",
    "# Step 4: Build submodels for different modalities\n",
    "# Example dense layers for numerical features\n",
    "x_num = Dense(128, activation='relu')(input_numerical)\n",
    "x_num = Dense(64, activation='relu')(x_num)\n",
    "\n",
    "# Example dense layers for categorical features\n",
    "x_cat = Dense(128, activation='relu')(input_categorical)\n",
    "x_cat = Dense(64, activation='relu')(x_cat)\n",
    "\n",
    "# Step 5: Combine the outputs from different modalities\n",
    "combined = concatenate([x_num, x_cat])\n",
    "\n",
    "# Step 6: Final output layers\n",
    "output = Dense(1, activation='sigmoid')(combined)  # For binary classification\n",
    "# output = Dense(3, activation='softmax')(combined)  # For multi-class classification\n",
    "\n",
    "# Step 7: Define the final mulimodal model\n",
    "model = Model(inputs=[input_numerical, input_categorical], outputs=output)\n",
    "\n",
    "# Step 8: Compile the mulimodal model\n",
    "model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n",
    "\n",
    "# Step 9: Train the mulimodal model\n",
    "history = model.fit([X_train[numerical_columns], X_train[columns_to_encode]], y_train, \n",
    "                    validation_data=([X_test[numerical_columns], X_test[columns_to_encode]], y_test),\n",
    "                    epochs=50, batch_size=32)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "e03f19f2-6846-4d4d-9457-b9349206cf65",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 114ms/step - accuracy: 0.5242 - loss: 1.0732 - val_accuracy: 0.6500 - val_loss: 0.6456\n",
      "Epoch 2/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 17ms/step - accuracy: 0.4836 - loss: 0.7700 - val_accuracy: 0.6500 - val_loss: 0.6687\n",
      "Epoch 3/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 24ms/step - accuracy: 0.4500 - loss: 0.8150 - val_accuracy: 0.6500 - val_loss: 0.6690\n",
      "Epoch 4/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - accuracy: 0.5484 - loss: 0.6790 - val_accuracy: 0.4000 - val_loss: 0.7713\n",
      "Epoch 5/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step - accuracy: 0.5641 - loss: 0.6748 - val_accuracy: 0.4000 - val_loss: 0.7986\n",
      "Epoch 6/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 33ms/step - accuracy: 0.5227 - loss: 0.6663 - val_accuracy: 0.4000 - val_loss: 0.7332\n",
      "Epoch 7/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step - accuracy: 0.6383 - loss: 0.6263 - val_accuracy: 0.6000 - val_loss: 0.6972\n",
      "Epoch 8/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.5633 - loss: 0.6389 - val_accuracy: 0.6500 - val_loss: 0.6935\n",
      "Epoch 9/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 18ms/step - accuracy: 0.5789 - loss: 0.6339 - val_accuracy: 0.4000 - val_loss: 0.7126\n",
      "Epoch 10/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - accuracy: 0.6289 - loss: 0.6216 - val_accuracy: 0.3500 - val_loss: 0.7418\n",
      "Epoch 11/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 40ms/step - accuracy: 0.6586 - loss: 0.6168 - val_accuracy: 0.3500 - val_loss: 0.7782\n",
      "Epoch 12/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 36ms/step - accuracy: 0.6898 - loss: 0.6012 - val_accuracy: 0.3500 - val_loss: 0.7508\n",
      "Epoch 13/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 39ms/step - accuracy: 0.6172 - loss: 0.6172 - val_accuracy: 0.6000 - val_loss: 0.6990\n",
      "Epoch 14/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 19ms/step - accuracy: 0.6313 - loss: 0.6131 - val_accuracy: 0.4500 - val_loss: 0.7052\n",
      "Epoch 15/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 26ms/step - accuracy: 0.6586 - loss: 0.5924 - val_accuracy: 0.4000 - val_loss: 0.7295\n",
      "Epoch 16/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 24ms/step - accuracy: 0.6313 - loss: 0.6080 - val_accuracy: 0.4000 - val_loss: 0.7366\n",
      "Epoch 17/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 36ms/step - accuracy: 0.6547 - loss: 0.5695 - val_accuracy: 0.3500 - val_loss: 0.7742\n",
      "Epoch 18/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 27ms/step - accuracy: 0.6469 - loss: 0.5838 - val_accuracy: 0.3500 - val_loss: 0.7840\n",
      "Epoch 19/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 21ms/step - accuracy: 0.6352 - loss: 0.5789 - val_accuracy: 0.5500 - val_loss: 0.7285\n",
      "Epoch 20/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.6625 - loss: 0.5812 - val_accuracy: 0.5500 - val_loss: 0.7060\n",
      "Epoch 21/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 25ms/step - accuracy: 0.6766 - loss: 0.5757 - val_accuracy: 0.6000 - val_loss: 0.7148\n",
      "Epoch 22/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 0.6602 - loss: 0.5847 - val_accuracy: 0.4000 - val_loss: 0.7801\n",
      "Epoch 23/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 25ms/step - accuracy: 0.6047 - loss: 0.5915 - val_accuracy: 0.3500 - val_loss: 0.8396\n",
      "Epoch 24/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.6195 - loss: 0.6094 - val_accuracy: 0.4000 - val_loss: 0.7749\n",
      "Epoch 25/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 17ms/step - accuracy: 0.6195 - loss: 0.5836 - val_accuracy: 0.6000 - val_loss: 0.7348\n",
      "Epoch 26/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step - accuracy: 0.6313 - loss: 0.5723 - val_accuracy: 0.6000 - val_loss: 0.7353\n",
      "Epoch 27/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step - accuracy: 0.6703 - loss: 0.5596 - val_accuracy: 0.4500 - val_loss: 0.7609\n",
      "Epoch 28/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 39ms/step - accuracy: 0.6250 - loss: 0.5529 - val_accuracy: 0.3500 - val_loss: 0.8708\n",
      "Epoch 29/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 38ms/step - accuracy: 0.6469 - loss: 0.5828 - val_accuracy: 0.3500 - val_loss: 0.8170\n",
      "Epoch 30/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 36ms/step - accuracy: 0.6008 - loss: 0.5692 - val_accuracy: 0.5500 - val_loss: 0.7233\n",
      "Epoch 31/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 35ms/step - accuracy: 0.6047 - loss: 0.5771 - val_accuracy: 0.6000 - val_loss: 0.7201\n",
      "Epoch 32/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 35ms/step - accuracy: 0.6469 - loss: 0.5579 - val_accuracy: 0.4000 - val_loss: 0.7513\n",
      "Epoch 33/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 30ms/step - accuracy: 0.6570 - loss: 0.5492 - val_accuracy: 0.4000 - val_loss: 0.7917\n",
      "Epoch 34/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step - accuracy: 0.6414 - loss: 0.5733 - val_accuracy: 0.4000 - val_loss: 0.7835\n",
      "Epoch 35/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 44ms/step - accuracy: 0.6125 - loss: 0.5520 - val_accuracy: 0.6500 - val_loss: 0.7482\n",
      "Epoch 36/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 25ms/step - accuracy: 0.6391 - loss: 0.5674 - val_accuracy: 0.5000 - val_loss: 0.7601\n",
      "Epoch 37/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 29ms/step - accuracy: 0.6391 - loss: 0.5455 - val_accuracy: 0.4500 - val_loss: 0.7854\n",
      "Epoch 38/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step - accuracy: 0.5742 - loss: 0.5527 - val_accuracy: 0.3500 - val_loss: 0.8084\n",
      "Epoch 39/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 40ms/step - accuracy: 0.6109 - loss: 0.5505 - val_accuracy: 0.6000 - val_loss: 0.7346\n",
      "Epoch 40/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 39ms/step - accuracy: 0.6352 - loss: 0.5532 - val_accuracy: 0.5500 - val_loss: 0.7410\n",
      "Epoch 41/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step - accuracy: 0.6305 - loss: 0.5387 - val_accuracy: 0.5000 - val_loss: 0.7641\n",
      "Epoch 42/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 37ms/step - accuracy: 0.6273 - loss: 0.5539 - val_accuracy: 0.3500 - val_loss: 0.8030\n",
      "Epoch 43/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 36ms/step - accuracy: 0.6336 - loss: 0.5572 - val_accuracy: 0.4000 - val_loss: 0.7801\n",
      "Epoch 44/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 35ms/step - accuracy: 0.6609 - loss: 0.5339 - val_accuracy: 0.5500 - val_loss: 0.7496\n",
      "Epoch 45/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 27ms/step - accuracy: 0.6727 - loss: 0.5452 - val_accuracy: 0.5500 - val_loss: 0.7371\n",
      "Epoch 46/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step - accuracy: 0.6656 - loss: 0.5622 - val_accuracy: 0.4000 - val_loss: 0.7740\n",
      "Epoch 47/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 27ms/step - accuracy: 0.6180 - loss: 0.5382 - val_accuracy: 0.4500 - val_loss: 0.8200\n",
      "Epoch 48/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 0.6297 - loss: 0.5500 - val_accuracy: 0.4000 - val_loss: 0.8148\n",
      "Epoch 49/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 21ms/step - accuracy: 0.6172 - loss: 0.5443 - val_accuracy: 0.4000 - val_loss: 0.8065\n",
      "Epoch 50/50\n",
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step - accuracy: 0.6648 - loss: 0.5434 - val_accuracy: 0.4000 - val_loss: 0.8136\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 60ms/step - accuracy: 0.4000 - loss: 0.8136\n",
      "Test Accuracy: 40.00%\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import LabelEncoder, MinMaxScaler\n",
    "from tensorflow.keras.models import Model\n",
    "from tensorflow.keras.layers import Dense, Input, concatenate\n",
    "\n",
    "# Load the preprocessed CSV file\n",
    "file_path = 'trace_statistics_with_advanced_patterns.csv'\n",
    "df = pd.read_csv(file_path)\n",
    "\n",
    "# Assuming 'target_column' is your label column and all other columns are features\n",
    "# Split the data into features and target\n",
    "X = df.drop(columns=['Filename'])  # Drop non-relevant columns like Filename\n",
    "y = df['target']  # Replace with the actual target column\n",
    "\n",
    "# Split numerical and categorical features\n",
    "numerical_columns = ['Total System Calls', 'Total Errors', 'File Operations', 'Memory Operations',\n",
    "                     'Network Operations', 'Process Management Operations', 'I/O Operations',\n",
    "                     'Time Operations', 'Signal Operations', 'IPC Operations', 'Thread Operations',\n",
    "                     'Resource Operations', 'Filesystem Operations', 'Security Operations',\n",
    "                     'Miscellaneous Operations', 'Average System Calls per Line']\n",
    "\n",
    "categorical_columns = ['Unique System Calls List', 'Unique Errors List', 'Unique File Operations List', \n",
    "                       'Pattern 1', 'Pattern 2', 'Pattern 3', 'Pattern 4', 'Pattern 5',\n",
    "                       'Advanced Pattern 1', 'Advanced Pattern 2', 'Advanced Pattern 3', \n",
    "                       'Advanced Pattern 4', 'Advanced Pattern 5']\n",
    "\n",
    "# Split the data into training and testing sets\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
    "\n",
    "# Normalize numerical columns\n",
    "scaler = MinMaxScaler()\n",
    "X_train_numerical = scaler.fit_transform(X_train[numerical_columns])\n",
    "X_test_numerical = scaler.transform(X_test[numerical_columns])\n",
    "\n",
    "# Initialize label encoder\n",
    "label_encoder = LabelEncoder()\n",
    "\n",
    "# Combine the training and testing data for label encoding\n",
    "combined_data = pd.concat([X_train, X_test])\n",
    "\n",
    "# Fit label encoder on the combined data for each categorical column\n",
    "for col in categorical_columns:\n",
    "    label_encoder.fit(combined_data[col])  # Fit on combined data\n",
    "    X_train[col] = label_encoder.transform(X_train[col])  # Transform training data\n",
    "    X_test[col] = label_encoder.transform(X_test[col])  # Transform test data\n",
    "\n",
    "# Define the numerical input\n",
    "input_numerical = Input(shape=(len(numerical_columns),), name='numerical_input')\n",
    "\n",
    "# Define the categorical input\n",
    "input_categorical = Input(shape=(len(categorical_columns),), name='categorical_input')\n",
    "\n",
    "# Create the numerical processing branch\n",
    "x_num = Dense(128, activation='relu')(input_numerical)\n",
    "x_num = Dense(64, activation='relu')(x_num)\n",
    "\n",
    "# Create the categorical processing branch\n",
    "x_cat = Dense(128, activation='relu')(input_categorical)\n",
    "x_cat = Dense(64, activation='relu')(x_cat)\n",
    "\n",
    "# Combine both branches\n",
    "combined = concatenate([x_num, x_cat])\n",
    "\n",
    "# Add final output layer (assuming binary classification, adjust for multi-class)\n",
    "output = Dense(1, activation='sigmoid')(combined)\n",
    "\n",
    "# Define the final model\n",
    "model = Model(inputs=[input_numerical, input_categorical], outputs=output)\n",
    "\n",
    "# Compile the model\n",
    "model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n",
    "\n",
    "# Train the model\n",
    "history = model.fit([X_train_numerical, X_train[categorical_columns]], y_train,\n",
    "                    validation_data=([X_test_numerical, X_test[categorical_columns]], y_test),\n",
    "                    epochs=50, batch_size=32)\n",
    "\n",
    "# Evaluate the model on the test set\n",
    "loss, accuracy = model.evaluate([X_test_numerical, X_test[categorical_columns]], y_test)\n",
    "print(f\"Test Accuracy: {accuracy * 100:.2f}%\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "188b803a-42e3-4b7f-a859-a265619136b9",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "88bb424d-254e-4db0-8ab2-b776445bd05c",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
