{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/tf/mnt/RobustLearning/Rev/Temp\r\n"
     ]
    }
   ],
   "source": [
    "!pwd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/compat/v2_compat.py:101: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "non-resource variables are not supported in the long term\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "import tensorflow.compat.v1 as tf\n",
    "tf.disable_v2_behavior()\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import os\n",
    "\n",
    "#import optuna\n",
    "\n",
    "from model_en import Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import scipy\n",
    "from scipy.io import loadmat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import glob\n",
    "data_dir='./'\n",
    "datasets=[]\n",
    "    \n",
    "for file in glob.glob(f\"{data_dir}/*npz\"): \n",
    "    datasets.append(file.split(\"/\")[-1])\n",
    "from scipy import io"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Lymphography.npz', 'musk.npz']"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sklearn\n",
    "from sklearn.preprocessing import MinMaxScaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load(name):\n",
    "    data = np.load(data_dir+name,allow_pickle=True)\n",
    "    X = data['X']\n",
    "    labels = ((data['y']).astype(np.int)).reshape(-1)\n",
    "    X_non_zero=X[:,np.where(np.sum(np.abs(X),axis=0)>0)[0]]\n",
    "    X_non_zero=X[:,np.where(np.std(X,axis=0)>1e-8)[0]]\n",
    "    samples= scipy.stats.zscore(X_non_zero, axis=0)\n",
    "    \n",
    "    perm_ind=np.random.permutation(samples.shape[0])\n",
    "    samples_perm=samples[perm_ind]\n",
    "    labels_perm=labels[perm_ind]\n",
    "    num_split = int(len(samples)*0.5)\n",
    "    train_data = samples_perm[:num_split]\n",
    "\n",
    "    \n",
    "    \n",
    "    test_data = samples_perm[num_split:]\n",
    "     \n",
    "    train_label = labels_perm[:num_split]\n",
    "    test_label = labels_perm[num_split:]\n",
    "    \n",
    "    train_sort_ind=np.argsort(train_label)\n",
    "    train_data=train_data[train_sort_ind]\n",
    "    train_label=train_label[train_sort_ind]\n",
    "    return train_data, train_label, test_data, test_label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def train_test_split(inliers,outliers):\n",
    "    num_split = len(inliers) // 2\n",
    "    train_data = inliers[:num_split]\n",
    "    train_label = np.zeros(num_split)\n",
    "    test_data = np.concatenate([inliers[num_split:],outliers],0)\n",
    "\n",
    "    test_label = np.zeros(test_data.shape[0])\n",
    "    test_label[num_split:]=1\n",
    "    return train_data, train_label, test_data, test_label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time as time\n",
    "from sklearn.metrics import roc_auc_score\n",
    "from sklearn.metrics import average_precision_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "auc_inductive=[]\n",
    "auc_transductive=[]\n",
    "\n",
    "\n",
    "for data in datasets:\n",
    "    \n",
    "\n",
    "    for runs in np.arange(10):\n",
    "        train_data, train_label, test_data, test_label=load(data)\n",
    "        while ((np.mean(test_label)==0)|(np.mean(train_label)==0)): \n",
    "            train_data, train_label, test_data, test_label=load(data)\n",
    "            print('re-split')\n",
    "        params  ={}\n",
    "        M=1\n",
    "        d=100\n",
    "        params['hidden_layers_node'] = [d,d,d,d,d,d,d,M,d,d,d,d,d,d,d]\n",
    "        params['lam'] =1\n",
    "        params['activation'] ='tanh'#\n",
    "        params['input_node']=train_data.shape[1]\n",
    "\n",
    "        params['embd_layer']=4\n",
    "        params['display_step']=200\n",
    "        params['sample_size']=train_data.shape[0]\n",
    "        params['batch_size']=np.int(train_data.shape[0]/10)\n",
    "        params['sample_selection']=True\n",
    "        params['normalize']=True\n",
    "        params['optimizer']='Adam'\n",
    "        learning_rate =0.01*(train_data.shape[0])/10000\n",
    "        params['sigma'] =1\n",
    "        model_dir=None\n",
    "        num_epoch=80\n",
    "        model = Model(**params)\n",
    "\n",
    "        train_losses, val_losses,best_gates = model.train(train_data,test_data, model_dir,learning_rate=learning_rate, num_epoch=num_epoch,train_gates=1)\n",
    "\n",
    "        X_rec,X_emb=model.test(test_data)\n",
    "        auc_inductive.append(roc_auc_score(test_label[:],np.linalg.norm(test_data-X_rec,axis=1)))\n",
    "\n",
    "        X_rec_t,X_emb=model.test(train_data)\n",
    "        auc_transductive.append(roc_auc_score(train_label[:],np.linalg.norm(train_data-X_rec_t,axis=1)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[1.0,\n",
       " 1.0,\n",
       " 1.0,\n",
       " 1.0,\n",
       " 1.0,\n",
       " 0.9930555555555556,\n",
       " 1.0,\n",
       " 1.0,\n",
       " 0.9906103286384976,\n",
       " 1.0]"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "auc_inductive"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
