{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "[STEP 1]: Upload cora dataset.\n",
      "| # of nodes : 2708\n",
      "| # of edges : 5278.0\n",
      "| # of features : 1433\n",
      "| # of clases   : 7\n",
      "| # of train set : 140\n",
      "| # of val set   : 500\n",
      "| # of test set  : 1000\n",
      "xxxxxxxxxx Evaluation begin xxxxxxxxxx\n",
      "0.869 0.864\n",
      "0.865 0.862\n",
      "0.864 0.862\n",
      "0.861 0.859\n",
      "0.858 0.863\n",
      "0.854 0.857\n",
      "0.847 0.846\n",
      "0.842 0.843\n",
      "0.836 0.839\n",
      "0.834 0.831\n",
      "xxxxxxxxxx Evaluation end xxxxxxxxxx\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pickle as pkl\n",
    "import networkx as nx\n",
    "import scipy.sparse as sp\n",
    "import torch\n",
    "import random\n",
    "import copy\n",
    "import sys\n",
    "import os\n",
    "import time\n",
    "import argparse\n",
    "import json\n",
    "import numpy as np\n",
    "import numpy.linalg as la\n",
    "import torch.nn.functional as F\n",
    "import torch.optim as optim\n",
    "import torch.nn as nn\n",
    "import pandas as pd\n",
    "from scipy.sparse import csgraph\n",
    "from torch.backends import cudnn\n",
    "from torch.optim import lr_scheduler\n",
    "from utils import *\n",
    "from graphConvolution import *\n",
    "\n",
    "#hyperparameters\n",
    "num_node = 2708\n",
    "num_class = 7\n",
    "num_aval = 100\n",
    "num_coreset = 18*num_class*(num_class-1)\n",
    "#num_coreset = 20\n",
    "batch_size = 5\n",
    "\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = '1'\n",
    "cudnn.benchmark = False            # if benchmark=True, deterministic will be False\n",
    "cudnn.deterministic = True\n",
    "#num_coreset = int((num_node-1500)*0.01)\n",
    "hidden_size = 128\n",
    "num_val = 500\n",
    "num_test = 1000\n",
    "\n",
    "    \n",
    "def get_receptive_fields_dense(cur_neighbors, selected_node, weighted_score): \n",
    "    receptive_vector=((adj_matrix2[selected_node]))+0\n",
    "    count=weighted_score.dot(receptive_vector)\n",
    "    return count\n",
    "\n",
    "def get_current_neighbors_dense(cur_nodes):\n",
    "    if np.array(cur_nodes).shape[0]==0:\n",
    "        return 0\n",
    "    neighbors=(adj_matrix2[list(cur_nodes)].sum(axis=0)!=0)+0\n",
    "    return neighbors\n",
    "\n",
    "def get_current_neighbors_1(cur_nodes):\n",
    "    if np.array(cur_nodes).shape[0]==0:\n",
    "        return 0\n",
    "    neighbors=(adj_matrix[list(cur_nodes)].sum(axis=0)!=0)+0\n",
    "    return neighbors\n",
    "\n",
    "def get_entropy_contribute(npy_m1,npy_m2):\n",
    "    entro1 = 0\n",
    "    entro2 = 0\n",
    "    for i in range(npy_m1.shape[0]):\n",
    "        entro1 -= np.sum(npy_m1[i]*np.log2(npy_m1[i]))\n",
    "        entro2 -= np.sum(npy_m2[i]*np.log2(npy_m2[i]))\n",
    "    return entro1 - entro2\n",
    "\n",
    "def get_max_info_entropy_node_set(idx_used,high_score_nodes):\n",
    "    max_info_node_set = [] \n",
    "    high_score_nodes_ = copy.deepcopy(high_score_nodes)\n",
    "    labels_ = copy.deepcopy(labels)\n",
    "    for k in range(batch_size):\n",
    "        score_list = np.zeros(len(high_score_nodes_))      \n",
    "        for i in range(len(high_score_nodes_)):\n",
    "            labels_tmp = copy.deepcopy(labels_)          \n",
    "            node = high_score_nodes_[i]\n",
    "            node_neighbors = get_current_neighbors_dense([node])\n",
    "            adj_neigh = adj_matrix2[list(node_neighbors)]\n",
    "            aay = np.matmul(adj_neigh,labels_)\n",
    "            total_score = 0\n",
    "            for j in range(num_class):\n",
    "                if model_prediction[node][j] != 0:\n",
    "                    labels_tmp[node] = 0\n",
    "                    labels_tmp[node][j] = 1\n",
    "                    aay_ = np.matmul(adj_neigh,labels_tmp)\n",
    "                    total_score += model_prediction[node][j]*get_entropy_contribute(aay,aay_)\n",
    "            score_list[i] = total_score\n",
    "        idx = np.argmax(score_list)\n",
    "        max_node = high_score_nodes_[idx]\n",
    "        max_info_node_set.append(max_node)\n",
    "        labels_[max_node] = model_prediction[max_node]\n",
    "        high_score_nodes_.remove(max_node)   \n",
    "    return max_info_node_set\n",
    "\n",
    "def aug_normalized_adjacency(adj):\n",
    "    adj = adj + sp.eye(adj.shape[0])\n",
    "    adj = sp.coo_matrix(adj)\n",
    "    row_sum = np.array(adj.sum(1))\n",
    "    d_inv_sqrt = np.power(row_sum, -1).flatten()\n",
    "    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.\n",
    "    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)\n",
    "    return d_mat_inv_sqrt.dot(adj).tocoo()\n",
    "\n",
    "class GCN(nn.Module):\n",
    "    def __init__(self, nfeat, nhid, nclass, dropout):\n",
    "        super(GCN, self).__init__()\n",
    "\n",
    "        self.gc1 = GraphConvolution(nfeat, nhid,bias=True)\n",
    "        self.gc2 = GraphConvolution(nhid, nclass,bias=True)\n",
    "        self.dropout = dropout\n",
    "\n",
    "    def forward(self, x, adj):\n",
    "        x = F.dropout(x, self.dropout, training=self.training)\n",
    "        x = F.relu(self.gc1(x, adj))\n",
    "        x = F.dropout(x, self.dropout, training=self.training)\n",
    "        x = self.gc2(x, adj)\n",
    "        return x\n",
    "   \n",
    "def train(epoch, model,record,optimizer):\n",
    "\n",
    "    model.train()\n",
    "    optimizer.zero_grad()\n",
    "    output = model(features_GCN, adj)\n",
    "    loss_train1 = F.cross_entropy(output[idx_train1], tmp_labels[idx_train1])\n",
    "    loss_train2_f = nn.KLDivLoss()\n",
    "    loss_train2 = loss_train2_f(F.log_softmax(output[idx_train2],dim=1), F.softmax(labels[idx_train2],dim=1))\n",
    "    loss_train = loss_train1+loss_train2\n",
    "    loss_train.backward()\n",
    "    optimizer.step()\n",
    "    model.eval()\n",
    "    output = model(features_GCN, adj)\n",
    "\n",
    "    loss_val = F.cross_entropy(output[idx_val], tmp_labels[idx_val])\n",
    "    acc_val = accuracy(output[idx_val], tmp_labels[idx_val])\n",
    "    loss_test = F.cross_entropy(output[idx_test], tmp_labels[idx_test])\n",
    "    acc_test = accuracy(output[idx_test], tmp_labels[idx_test])\n",
    "    record[acc_val.item()] = acc_test.item()\n",
    "\n",
    "def update_model_prediction():\n",
    "    model = GCN(nfeat=features_GCN.shape[1],\n",
    "            nhid=hidden_size,\n",
    "            nclass=num_class,\n",
    "            dropout=0.85)\n",
    "    model.cuda()\n",
    "    optimizer = optim.Adam(model.parameters(),\n",
    "                           lr=0.05, weight_decay=5e-4)\n",
    "    record = {}\n",
    "    for epoch in range(400):\n",
    "        train(epoch,model,record,optimizer)\n",
    "    output = model(features_GCN, adj)\n",
    "    sfl = nn.Softmax(dim=1)\n",
    "    output_ = sfl(output)\n",
    "    model_prediction = np.array(output_.detach().cpu())\n",
    "    return model_prediction\n",
    "    \n",
    "\n",
    "\n",
    "#read dataset\n",
    "adj, features, labels, idx_train, idx_val, idx_test = load_data(dataset=\"cora\")\n",
    "tmp_labels = copy.deepcopy(labels)\n",
    "tmp_labels = tmp_labels.cuda()\n",
    "num_zeros = np.zeros(num_node)\n",
    "num_ones = np.ones(num_node)\n",
    "idx_val = list(idx_val.cpu())\n",
    "idx_test = list(idx_test.cpu())\n",
    "idx_avaliable = list()\n",
    "for i in range(num_node):\n",
    "    if i not in idx_val and i not in idx_test:\n",
    "        idx_avaliable.append(i)\n",
    "\n",
    "#compute normalized distance\n",
    "adj = aug_normalized_adjacency(adj)\n",
    "adj_matrix = torch.FloatTensor(adj.todense()).cuda()\n",
    "adj_matrix2 = torch.mm(adj_matrix,adj_matrix).cuda()\n",
    "adj_matrix2 = np.array(adj_matrix2.cpu())\n",
    "adj = sparse_mx_to_torch_sparse_tensor(adj).float().cuda()\n",
    "features_GCN = copy.deepcopy(features) \n",
    "features_GCN = torch.FloatTensor(features_GCN).cuda()\n",
    "\n",
    "model_prediction = np.full((num_node,num_class),1/num_class)\n",
    "labels = np.full((num_node,num_class),1/num_class)\n",
    "\n",
    "adj_matrix = np.array(adj_matrix.cpu())\n",
    "degree_result = []\n",
    "nodes_degree = []\n",
    "count = 0\n",
    "for i in range(num_node):\n",
    "    tmp_list = []\n",
    "    tmp_list.append(i)\n",
    "    cur_neighbors = get_current_neighbors_1(tmp_list)\n",
    "    tmp_degree = float(np.ones(num_node).dot(cur_neighbors))\n",
    "    nodes_degree.append([i,tmp_degree])\n",
    "nodes_degree.sort(key = lambda x:x[1])\n",
    "for i in range(num_node):\n",
    "    if nodes_degree[num_node-i-1][0] in idx_avaliable:\n",
    "        degree_result.append(nodes_degree[num_node-i-1][0])\n",
    "label_flag = np.ones((num_node,num_class))\n",
    "degree_flag = 0\n",
    "idx_train = random.sample(degree_result,2*num_class)\n",
    "for node in idx_train:\n",
    "    degree_result.remove(node)\n",
    "    label_ = tmp_labels[node].item()\n",
    "    labels[node] = 0\n",
    "    labels[node][label_] = 1\n",
    "idx_train1 = []\n",
    "idx_train2 = []\n",
    "for node in idx_train:\n",
    "    if np.max(labels[node]) == 1:\n",
    "        idx_train1.append(node)\n",
    "    else:\n",
    "        idx_train2.append(node)\n",
    "idx_train1 = torch.LongTensor(idx_train).cuda()\n",
    "idx_train2 = torch.LongTensor(idx_train).cuda()\n",
    "\n",
    "idx_avaliable = degree_result[0:num_aval]\n",
    "idx_avaliable_temp = copy.deepcopy(idx_avaliable)\n",
    "labels = torch.FloatTensor(labels).cuda()\n",
    "idx_train = torch.LongTensor(idx_train).cuda()\n",
    "idx_val = torch.LongTensor(idx_val).cuda()\n",
    "idx_test = torch.LongTensor(idx_test).cuda()\n",
    "count = 0\n",
    "break_list = []\n",
    "model_prediction = update_model_prediction()\n",
    "idx_train = list(idx_train.cpu())\n",
    "labels = np.array(labels.cpu())\n",
    "while True:\n",
    "    t1 = time.time()\n",
    "    max_info_entropy_node_set = get_max_info_entropy_node_set(idx_train,idx_avaliable_temp) \n",
    "    cnt1 = 0\n",
    "    for i in range(batch_size):      \n",
    "        max_info_entropy_node = max_info_entropy_node_set[i]\n",
    "        if max_info_entropy_node not in idx_train:\n",
    "            idx_train.append(max_info_entropy_node)        \n",
    "        if count%(num_class*(num_class-1))==0:\n",
    "            idx_train1 = []\n",
    "            idx_train2 = []\n",
    "            for node in idx_train:\n",
    "                if np.max(labels[node]) == 1:\n",
    "                    idx_train1.append(node)\n",
    "                else:\n",
    "                    idx_train2.append(node)\n",
    "            break_list.append([len(idx_train),len(idx_train1),len(idx_train2)])\n",
    "        count += 1     \n",
    "        tmp_class = np.argmax(model_prediction[max_info_entropy_node])\n",
    "        if tmp_class == tmp_labels[max_info_entropy_node].item():\n",
    "            labels[max_info_entropy_node] = 0\n",
    "            labels[max_info_entropy_node][tmp_class] = 1\n",
    "            idx_avaliable.remove(max_info_entropy_node)\n",
    "            idx_avaliable_temp.remove(max_info_entropy_node)\n",
    "            cnt1+=1\n",
    "        else:\n",
    "            label_flag[max_info_entropy_node][tmp_class] = 0\n",
    "            pred_vec = model_prediction[max_info_entropy_node]*label_flag[max_info_entropy_node]\n",
    "            pred_sum = np.sum(pred_vec)\n",
    "            for j in range(num_class):\n",
    "                labels[max_info_entropy_node][j] = pred_vec[j]/pred_sum\n",
    "            if np.max(labels[max_info_entropy_node]) == 1:\n",
    "                idx_avaliable.remove(max_info_entropy_node)\n",
    "                idx_avaliable_temp.remove(max_info_entropy_node)\n",
    "                cnt1+=1\n",
    "    idx_train1 = []\n",
    "    idx_train2 = []\n",
    "    for node in idx_train:\n",
    "        if np.max(labels[node]) == 1:\n",
    "            idx_train1.append(node)\n",
    "        else:\n",
    "            idx_train2.append(node)\n",
    "    idx_train1 = torch.LongTensor(idx_train1).cuda()\n",
    "    idx_train2 = torch.LongTensor(idx_train2).cuda()\n",
    "    labels = torch.FloatTensor(labels).cuda()\n",
    "    idx_train = torch.LongTensor(idx_train).cuda()\n",
    "    model_prediction = update_model_prediction()\n",
    "    model_prediction = model_prediction * label_flag\n",
    "    idx_train = list(idx_train.cpu())\n",
    "    for i in range(cnt1):\n",
    "        tmp_node = degree_result[degree_flag]\n",
    "        degree_flag += 1\n",
    "        idx_avaliable.append(tmp_node)\n",
    "        idx_avaliable_temp.append(tmp_node)\n",
    "    labels = np.array(labels.cpu())\n",
    "    if count >= num_coreset:\n",
    "        break\n",
    "\n",
    "\n",
    "idx_train1 = []\n",
    "idx_train2 = []\n",
    "for node in idx_train:\n",
    "    if np.max(labels[node]) == 1:\n",
    "        idx_train1.append(node)\n",
    "    else:\n",
    "        idx_train2.append(node)\n",
    "idx_train1 = torch.LongTensor(idx_train1).cuda()\n",
    "idx_train2 = torch.LongTensor(idx_train2).cuda()\n",
    "labels = torch.FloatTensor(labels).cuda()\n",
    "idx_train = torch.LongTensor(idx_train).cuda()\n",
    "print('xxxxxxxxxx Evaluation begin xxxxxxxxxx')\n",
    "t_total = time.time()\n",
    "record = {}\n",
    "model = GCN(nfeat=features_GCN.shape[1],\n",
    "        nhid=hidden_size,\n",
    "        nclass=labels.max().item() + 1,\n",
    "        dropout=0.85)\n",
    "model.cuda()\n",
    "optimizer = optim.Adam(model.parameters(),\n",
    "                        lr=0.05, weight_decay=5e-4)\n",
    "for epoch in range(400):\n",
    "    train(epoch,model,record,optimizer)\n",
    "\n",
    "bit_list = sorted(record.keys())\n",
    "bit_list.reverse()\n",
    "for key in bit_list[:10]:\n",
    "    value = record[key]\n",
    "    print(key,value)\n",
    "print('xxxxxxxxxx Evaluation end xxxxxxxxxx')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
