{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[],"dockerImageVersionId":30699,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"#@title Imports\n%reset -f\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.optim as optim\nfrom tqdm import tqdm\nimport copy\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom mpl_toolkits.axes_grid1 import make_axes_locatable\nfrom itertools import product as cartesian_prod\nfrom sklearn.metrics import pairwise_distances\nfrom sklearn.metrics import accuracy_score\nfrom sklearn import tree\nfrom sklearn import cluster, mixture\nimport zipfile\nimport shutil\nimport urllib.request\nimport numpy as np\nimport pandas as pd\nfrom sklearn.preprocessing import StandardScaler, LabelEncoder\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.model_selection import train_test_split, GridSearchCV\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.svm import SVC\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.neighbors import KNeighborsClassifier\nfrom sklearn.neural_network import MLPClassifier\nfrom sklearn.tree import DecisionTreeClassifier\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.neighbors import NearestCentroid\nfrom scipy.io import arff\nfrom sklearn.metrics import make_scorer\nnp.set_printoptions(precision=4)\n\n\n#@title Importing Packages\nimport os\nimport random\nfrom copy import deepcopy\nimport torchvision\nimport torchvision.transforms as transforms","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\nprint(device)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#@title Synthetic data\ndef set_npseed(seed):\n    np.random.seed(seed)\n\n\ndef set_torchseed(seed):\n    torch.manual_seed(seed)\n    torch.cuda.manual_seed(seed)\n    torch.cuda.manual_seed_all(seed)\n    torch.backends.cudnn.deterministic = True\n    torch.backends.cudnn.benchmark = False\n\n\n#classification data\n\ndef data_gen_decision_tree(num_data=1000, dim=2, seed=0, w_list=None, b_list=None,vals=None, num_levels=2):        \n    set_npseed(seed=seed)\n\n    # Construct a complete decision tree with 2**num_levels-1 internal nodes,\n    # e.g. num_levels=2 means there are 3 internal nodes.\n    # w_list, b_list is a list of size equal to num_internal_nodes\n    # vals is a list of size equal to num_leaf_nodes, with values +1 or 0\n    num_internal_nodes = 2**num_levels - 1\n    num_leaf_nodes = 2**num_levels\n    stats = np.zeros(num_internal_nodes+num_leaf_nodes) #stores the num of datapoints at each node so at 0(root) all data points will be present\n\n    if vals is None: #when val i.e., labels are not provided make the labels dynamically\n        vals = np.arange(0,num_internal_nodes+num_leaf_nodes,1,dtype=np.int32)%2 #assign 0 or 1 label to the node based on whether its numbering is even or odd\n        vals[:num_internal_nodes] = -99 #we put -99 to the internal nodes as only the values of leaf nodes are counted\n\n    if w_list is None: #if the w values of the nodes (hyperplane eqn) are not provided then generate dynamically\n        w_list = np.random.standard_normal((num_internal_nodes, dim))\n        w_list = w_list/np.linalg.norm(w_list, axis=1)[:, None] #unit norm w vects\n        b_list = np.zeros((num_internal_nodes))\n\n    '''\n    np.random.random_sample\n    ========================\n    Return random floats in the half-open interval [0.0, 1.0).\n\n    Results are from the \"continuous uniform\" distribution over the\n    stated interval.  To sample :math:`Unif[a, b), b > a` multiply\n    the output of `random_sample` by `(b-a)` and add `a`::\n\n        (b - a) * random_sample() + a\n    '''\n\n#     data_x = np.random.random_sample((num_data, dim))*2 - 1. #generate the datas in range -1 to +1\n#     relevant_stats = data_x @ w_list.T + b_list #stores the x.wT+b value of each nodes for all data points(num_data x num_nodes) to check if > 0 i.e will follow right sub tree route or <0 and will follow left sub tree route\n#     curr_index = np.zeros(shape=(num_data), dtype=int) #stores the curr index for each data point from root to leaf. So initially a datapoint starts from root but then it can go to right or left if it goes to right its curr index will become 2 from 0 else 1 from 0 then in next iteration from say 2 it goes to right then it will become 6\n\n    data_x = np.random.standard_normal((num_data, dim))\n    data_x /= np.sqrt(np.sum(data_x**2, axis=1, keepdims=True))\n    relevant_stats = data_x @ w_list.T + b_list\n    curr_index = np.zeros(shape=(num_data), dtype=int)\n    \n    for level in range(num_levels):\n        nodes_curr_level=list(range(2**level - 1,2**(level+1)-1  ))\n        for el in nodes_curr_level:\n#             b_list[el]=-1*np.median(relevant_stats[curr_index==el,el])\n            relevant_stats[:,el] += b_list[el]\n        decision_variable = np.choose(curr_index, relevant_stats.T) #based on the curr index will choose the corresponding node value of the datapoint\n\n        # Go down and right if wx+b>0 down and left otherwise.\n        # i.e. 0 -> 1 if w[0]x+b[0]<0 and 0->2 otherwise\n        curr_index = (curr_index+1)*2 - (1-(decision_variable > 0)) #update curr index based on the desc_variable\n        \n\n    bound_dist = np.min(np.abs(relevant_stats), axis=1) #finds the abs value of the minm node value of a datapoint. If some node value of a datapoint is 0 then that data point exactly passes through a hyperplane and we remove all such datapoints\n    thres = threshold\n    labels = vals[curr_index] #finally labels for each datapoint is assigned after traversing the whole tree\n\n    data_x_pruned = data_x[bound_dist>thres] #to distingush the hyperplanes seperately for 0 1 labels (classification)\n    #removes all the datapoints that passes through a node hyperplane\n    labels_pruned = labels[bound_dist>thres]\n    relevant_stats = np.sign(data_x_pruned @ w_list.T + b_list) #storing only +1 or -1 for a particular node if it is active or not\n    nodes_active = np.zeros((len(data_x_pruned),  num_internal_nodes+num_leaf_nodes), dtype=np.int32) #stores node actv or not for a data\n\n    for node in range(num_internal_nodes+num_leaf_nodes):\n        if node==0:\n            stats[node]=len(relevant_stats) #for root node all datapoints are present\n            nodes_active[:,0]=1 #root node all data points active status is +1\n            continue\n        parent = (node-1)//2\n        nodes_active[:,node]=nodes_active[:,parent]\n        right_child = node-(parent*2)-1 # 0 means left, 1 means right 1 has children 3,4\n        #finds if it is a right child or left of the parent\n        if right_child==1:\n            nodes_active[:,node] *= relevant_stats[:,parent]>0 #if parent node val was >0 then this right child of parent is active\n        if right_child==0:\n            nodes_active[:,node] *= relevant_stats[:,parent]<0 #else left is active\n        stats = nodes_active.sum(axis=0) #updates the status i.e., no of datapoints active in that node (root has all active then gradually divided in left right)\n    return ((data_x_pruned, labels_pruned), (w_list, b_list, vals), stats)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"class DLGN_FC(nn.Module):\n    def __init__(self, input_dim=None, output_dim=None, num_hidden_nodes=[], beta=30, dlgn_mode='dlgn_sf', mode='pwc'):\n        super(DLGN_FC, self).__init__()\n        self.num_hidden_layers = len(num_hidden_nodes)\n        self.beta=beta  # Soft gating parameter\n        self.dlgn_mode = dlgn_mode\n        self.mode = mode\n        self.num_nodes=[input_dim]+num_hidden_nodes+[output_dim]\n        self.gating_layers=nn.ModuleList()\n        self.value_layers=nn.ModuleList()\n\n        for i in range(self.num_hidden_layers+1):\n            if i!=self.num_hidden_layers:\n                if self.dlgn_mode == 'dlgn_sf':\n                    temp = nn.Linear(self.num_nodes[0], self.num_nodes[i+1], bias=False)\n                else :\n                    temp = nn.Linear(self.num_nodes[i], self.num_nodes[i+1], bias=False)\n                self.gating_layers.append(temp)\n            temp = nn.Linear(self.num_nodes[i], self.num_nodes[i+1], bias=False)\n            self.value_layers.append(temp)\n    \n    def set_parameters_with_mask(self, to_copy, parameter_masks):\n        # self and to_copy are DLGN_FC objects with same architecture\n        # parameter_masks is compatible with dict(to_copy.named_parameters())\n        for (name, copy_param) in to_copy.named_parameters():\n            copy_param = copy_param.clone().detach()\n            orig_param  = self.state_dict()[name]\n            if name in parameter_masks:\n                param_mask = parameter_masks[name]>0\n                orig_param[param_mask] = copy_param[param_mask]\n            else:\n                orig_param = copy_param.data.detach()\n\n    def return_gating_functions(self):\n        effective_weights = []\n        for i in range(self.num_hidden_layers):\n            curr_weight = self.gating_layers[i].weight.detach().clone()\n#             curr_weight /= torch.norm(curr_weight, dim=1, keepdim=True)\n            if self.dlgn_mode=='dlgn_sf':\n                effective_weights.append(curr_weight)\n            else:\n                if i==0:\n                    effective_weights.append(curr_weight)\n                else:\n                    effective_weights.append(torch.matmul(curr_weight,effective_weights[-1]))\n        return effective_weights\n        # effective_weights (and effective biases) is a list of size num_hidden_layers\n\n\n    def forward(self, x):\n        gate_scores=[x]\n\n        for el in self.parameters():\n            if el.is_cuda:\n                device = torch.device('cuda:0')\n                # device = torch.device('cpu')\n            else:\n                device = torch.device('cpu')\n        if self.mode=='pwc':\n            values=[torch.ones(x.shape).to(device)]\n        else:\n            values=[x.to(device)]\n\n        for i in range(self.num_hidden_layers):\n            if self.dlgn_mode=='dlgn_sf':\n                gate_scores.append( (x@self.gating_layers[i].weight.T) )\n            else:\n                gate_scores.append(self.gating_layers[i].to(device)(gate_scores[-1].to(device)))\n            curr_gate_on_off = torch.sigmoid(self.beta * gate_scores[-1])\n            values.append(self.value_layers[i](values[-1])*curr_gate_on_off)\n        values.append(self.value_layers[self.num_hidden_layers](values[-1]))\n        # Values is a list of size 1+num_hidden_layers+1\n        #gate_scores is a list of size 1+num_hidden_layers\n        return values,gate_scores","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#@title Train DLGN model\ndef train_dlgn (DLGN_obj, train_data_curr,vali_data_curr,test_data_curr,\n\t\t\t\ttrain_labels_curr,test_labels_curr,vali_labels_curr,num_epoch=1,\n\t\t\t\tparameter_mask=dict()):\n    # DLGN_obj is the initial network\n    # parameter_mask is a dictionary compatible with dict(DLGN_obj.named_parameters())\n    # if a key corresponding to a named_parameter is not present it is assumed to be all ones (i.e it will be updated)\n\n    # Assuming that we are on a CUDA machine, this should print a CUDA device:\n    # device='cpu'\n    # Speed up of a factor of over 40 by using GPU instead of CPU\n    # Final train loss of 0.02 and test acc of 74%\n\n    print(\"train_data_curr inside train_dlgn:\",train_data_curr.shape)\n    set_torchseed(seed)\n\n    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\n    # device = torch.device('cpu')\n    DLGN_obj.to(device)\n    criterion = nn.CrossEntropyLoss()\n#     optimizer = optim.SGD(DLGN_obj.parameters(), lr=lr)\n    optimizer = optim.Adam(DLGN_obj.parameters(), lr=lr)\n\n    train_data_torch = torch.Tensor(train_data_curr)\n    vali_data_torch = torch.Tensor(vali_data_curr)\n#     test_data_torch = torch.Tensor(test_data_curr)\n\n    train_labels_torch = torch.tensor(train_labels_curr, dtype=torch.int64)\n#     test_labels_torch = torch.tensor(test_labels_curr, dtype=torch.int64)\n    vali_labels_torch = torch.tensor(vali_labels_curr, dtype=torch.int64)\n\n    num_batches = no_of_batches\n    batch_size = len(train_data_curr)//num_batches\n    losses=[]\n    DLGN_obj_store = []\n    best_vali_error = len(vali_labels_curr)\n    debug_models= []\n    train_losses = []\n    running_loss = 0.7*num_batches # initial random loss = 0.7 \n\n    for epoch in tqdm(range(saved_epochs[-1]+1)):  # loop over the dataset multiple times\n#         if epoch in saved_epochs:\n#             DLGN_obj_copy = deepcopy(DLGN_obj)\n#             DLGN_obj_copy.to(torch.device('cpu'))\n#             DLGN_obj_store.append(DLGN_obj_copy)\n#             train_outputs_values, train_outputs_gate_scores =DLGN_obj(torch.Tensor(train_data_curr).to(device))\n#             train_preds = train_outputs_values[-1]\n#             criterion = nn.CrossEntropyLoss()\n#             outputs = torch.cat((-1*train_preds,train_preds), dim=1)\n#             targets = torch.tensor(train_labels_curr, dtype=torch.int64).to(device)\n#             train_loss = criterion(outputs, targets)\n\n#             train_losses.append(train_loss)\n#             if epoch%100 == 0:\n#                 print(train_loss)\n#             if train_loss < 5e-6:\n#                 break\n#             if np.isnan(train_loss.detach().cpu().numpy()):\n#                 break\n        running_loss = 0.0\n        for batch_start in range(0,len(train_data_curr),batch_size):\n            if (batch_start+batch_size)>len(train_data_curr):\n                break\n            optimizer.zero_grad()\n            inputs = train_data_torch[batch_start:batch_start+batch_size]\n            targets = train_labels_torch[batch_start:batch_start+batch_size].reshape(batch_size)\n            inputs = inputs.to(device)\n            targets = targets.to(device)\n            values,gate_scores = DLGN_obj(inputs)\n            outputs = torch.cat((-1*values[-1], values[-1]), dim=1)\n            loss = criterion(outputs, targets)\t\t\t\n            loss.backward()\n            for name,param in DLGN_obj.named_parameters():\n                parameter_mask[name] = parameter_mask[name].to(device)\n                param.grad *= parameter_mask[name]   \n                # if \"val\" in name and epoch>500:\n                # \tparam.grad *= 0.\n            optimizer.step()\n            running_loss += loss.item()\n\n\n        train_outputs_values, train_outputs_gate_scores =DLGN_obj(torch.Tensor(train_data_curr).to(device))\n        train_preds = train_outputs_values[-1]\n        criterion = nn.CrossEntropyLoss()\n        outputs = torch.cat((-1*train_preds,train_preds), dim=1)\n        targets = torch.tensor(train_labels_curr, dtype=torch.int64).to(device)\n        train_loss = criterion(outputs, targets)\n\n        losses.append(train_loss.cpu().detach().clone().numpy())\n        inputs = vali_data_torch.to(device)\n        targets = vali_labels_torch.to(device)\n        values,gate_scores =DLGN_obj(inputs)\n        vali_preds = torch.cat((-1*values[-1], values[-1]), dim=1)\n        vali_preds = torch.argmax(vali_preds, dim=1)\n        vali_error= torch.sum(targets!=vali_preds)\n        if vali_error < best_vali_error:\n            DLGN_obj_return = deepcopy(DLGN_obj)\n            best_vali_error = vali_error\n#     plt.figure()\n#     plt.title(\"DLGN loss vs epoch\")\n#     plt.plot(losses)\n    # \tif not os.path.exists('figures'):\n    # \t\tos.mkdir('figures')\n\n    # \tfilename = 'figures/'+filename_suffix +'.pdf'\n    # \tplt.savefig(filename)\n    DLGN_obj_return.to(torch.device('cpu'))\n    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\n    # device = torch.device('cpu')\n    return train_losses, DLGN_obj_return, DLGN_obj_store, losses, debug_models","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"**DLGN Decision Tree**","metadata":{}},{"cell_type":"code","source":"class Node():\n    def __init__(self, feature_vals=None,feature_bias=None, left=None, right=None, info_gain=None, value=None):\n        ''' constructor ''' \n        \n        # for decision node\n        self.feature_vals = feature_vals\n        self.feature_bias = feature_bias\n        self.left = left\n        self.right = right\n        self.info_gain = info_gain\n        \n        # for leaf node\n        self.value = value\n    #2. Check the weights' (1500x500) 1st index (0th) if it is -ve make the entire row change sign (multiply by -1) and do experiments. So the 4th coordinate datas will now become 1st coordinate,\n    \n    #so will get everything in +ve w not -ve w. clustering of centres will happen for only +ve.\ndef process_array(arr):\n    mask = arr[:, 0] < 0\n    arr[mask] *= -1\n    return arr\n    \nclass DecisionTreeClassifierDT():\n    def __init__(self, min_samples_split=2, max_depth=2):\n        ''' constructor '''\n\n        # initialize the root of the tree \n        self.root = None\n\n        # stopping conditions\n        self.min_samples_split = min_samples_split\n        self.max_depth = max_depth\n\n    def build_tree(self, dataset,dataset_val,eps,min_samples,num_hidden_nodes, curr_depth=0,curr_node=0):\n        ''' recursive function to build the tree ''' \n#         if len(dataset)==0: return;\n\n        X, Y = dataset[:,:-1], dataset[:,-1]\n        num_samples, num_features = np.shape(X)\n        print(\"curr_depth:\",curr_depth)\n        print(\"curr_node:\",curr_node)\n        print(\"num_samples:\",num_samples)\n        print(\"num_features:\",num_features)\n\n\n        # split until stopping conditions are met\n        if num_samples>=self.min_samples_split and curr_depth<=self.max_depth:\n            # find the best split\n            best_split = self.get_best_split(dataset,dataset_val,eps,min_samples,curr_depth,curr_node,num_hidden_nodes)\n            # check if information gain is positive\n            print(\"best_info_gain:\",best_split[\"info_gain\"])\n            if best_split[\"info_gain\"]>0:\n            # recur left\n#             num_hidden_nodes=[num_hidden_nodes[0]//2**(curr_depth),num_hidden_nodes[1]//2**(curr_depth),num_hidden_nodes[2]//2**(curr_depth)]\n                left_subtree = self.build_tree(best_split[\"dataset_left\"],dataset_val,eps,min_samples,num_hidden_nodes, curr_depth+1,2*curr_node+1)\n                # recur right\n                right_subtree = self.build_tree(best_split[\"dataset_right\"],dataset_val,eps,min_samples,num_hidden_nodes, curr_depth+1,2*curr_node+2)\n                # return decision node\n                return Node(best_split[\"feature_vals\"], best_split[\"feature_bias\"],\n                            left_subtree, right_subtree, best_split[\"info_gain\"])\n\n        # compute leaf node\n        leaf_value = self.calculate_leaf_value(Y)\n        print(\"leaf_value:\",leaf_value)\n        # return leaf node\n        return Node(value=leaf_value)\n\n    def get_best_split(self, dataset,dataset_val,eps,min_samples,curr_depth,curr_node,num_hidden_nodes):\n        ''' function to find the best split '''\n\n        # dictionary to store the best split\n        best_split = {}\n        max_info_gain = -float(\"inf\")\n\n        X_train, Y_train = dataset[:,:-1], dataset[:,-1]\n        X_valid,Y_valid = dataset_val[:,:-1],dataset_val[:,-1]\n        print(\"X_train:\",X_train.shape)\n#         train_data_dt, vali_data_dt, train_data_labels_dt, vali_data_labels_dt = train_test_split(X, Y, test_size=.1, random_state=41)\n        set_npseed(seed)\n        set_torchseed(seed)\n        ##DLGN clustering of wts\n\n        set_torchseed(6675)\n        DLGN_init= DLGN_FC(input_dim=input_dim, output_dim=1, num_hidden_nodes=num_hidden_nodes, beta=beta, dlgn_mode=dlgn_mode)\n        train_parameter_masks=dict()\n        for name,parameter in DLGN_init.named_parameters():\n            if \"val\" in name:\n                train_parameter_masks[name]=torch.ones_like(parameter) #*0.001 # Updating all value network layers\n            if \"gat\" in name:\n                train_parameter_masks[name]=torch.ones_like(parameter)\n\n\n                # train_parameter_masks[name][:num_neurons_set] *= 0.\n            train_parameter_masks[name].to(device)\n\n        set_torchseed(5000)\n        train_losses, DLGN_obj_final, DLGN_obj_store, losses , debug_models= train_dlgn(train_data_curr=X_train, vali_data_curr=X_valid,test_data_curr=X_valid, train_labels_curr=Y_train,\n                                                    test_labels_curr=Y_valid,vali_labels_curr=Y_valid,DLGN_obj=deepcopy(DLGN_init),parameter_mask=train_parameter_masks)\n        \n#         print(\"Plotting train_losses\",train_losses)\n#         plt.plot(train_losses)\n#         print(\"Plotting losses\",losses)\n#         plt.plot(losses)\n        torch.cuda.empty_cache() \n\n        w_list = np.concatenate((w_list_old,-w_list_old),axis=0)\n        effective_weights = DLGN_obj_final.return_gating_functions()\n        wts_list=[] #DLGN model effective wts\n        for layer in range(len(effective_weights)):\n#             if layer!=0: #and layer!=1 and layer!=2:\n            wts =  np.array(effective_weights[layer].cpu().data.detach().numpy())\n#             wts /= np.linalg.norm(wts, axis=1)[:,None]\n            wts_list.append(wts)\n        wts_list = np.concatenate(wts_list)\n        \n        ### Sorting based on NORMS of the vectors\n        \n        # Calculate norms of each vector\n        norms = np.linalg.norm(wts_list, axis=1)\n#         print (\"norms:\",norms)\n\n        # Sort indices based on norms in descending order\n        sorted_indices = np.argsort(norms)[::-1]\n        print(\"sorted_indices:\",norms[sorted_indices][:40])\n\n        # Take the best 100 vectors\n        best_100_vectors_descending = wts_list[sorted_indices[:]]\n        # Calculate norms of each vector\n        norms = np.linalg.norm(best_100_vectors_descending, axis=1)\n\n        # Normalize each vector by dividing it by its norm\n        wts_list = best_100_vectors_descending / norms[:, np.newaxis]\n\n        # Now, best_100_vectors contains the top 100 vectors sorted by their norms\n\n#         norms_dt_hplane = np.linalg.norm(w_list, axis=1)\n#         print(\"norms_dt_hplane:\",norms_dt_hplane)\n        \n        \n        wts_list = process_array(wts_list)\n        print(\"wts_list shape:\",wts_list.shape)\n        \n        pd1 = pairwise_distances(w_list,wts_list)\n        print(\"Pairwise Distance of all top k hyperplanes to each labelling func hyperplane \\n \")\n        print(\"pd1:\",pd1)\n\n        min_dis = np.inf\n        eps_best = 0\n        min_samples_best = 0\n        label_max_clus_best=0\n        if curr_node ==0:\n            eps = .2\n            min_samples = 6\n        if curr_node ==1:\n            eps = .3\n            min_samples = 6\n        if curr_node ==2:\n            eps = .3\n            min_samples = 6\n#         if curr_node ==3:\n#             eps = .3\n#             min_samples = 15\n#         if curr_node ==5:\n#             eps = .3\n#             min_samples = 16\n#         if curr_node ==13:\n#             eps = .3\n#             min_samples = 7\n        \n        core_samples, dbscan_labels = cluster.dbscan(wts_list, eps=eps, metric='euclidean', min_samples=min_samples)\n        num_clusters = max(dbscan_labels)+1\n        if num_clusters>0:\n            print(\"eps:\",eps,\"min_samples:\",min_samples)\n            cluster_centres = []\n            cluster_no = []\n            for i in range(num_clusters):\n                cluster_centres.append(wts_list[dbscan_labels==i].mean(axis=0))\n                cluster_no.append(len(wts_list[dbscan_labels==i]))\n            cluster_centres = np.array(cluster_centres)\n            label_max_clus = np.argmax(cluster_no)\n            feature_vals = cluster_centres[label_max_clus].reshape(1,-1)\n            feature_bias = 0\n            \n            print(\"cluster_centres_size:\",len(cluster_centres))\n            print(\"DT data hyperplane shape\",w_list.shape)\n            pd2 = pairwise_distances(w_list,cluster_centres)\n            print(\"Pairwise Distance of all halfspace cluster centres to each labelling func hyperplane \\n \")\n            print(\"pd2:\",pd2)\n            print(\"No of wts in each cluster:\",cluster_no)\n            print(\"cluster_no[label_max_clus]:\",cluster_no[label_max_clus])\n            print(\"label_max_clus:\",label_max_clus)\n            \n            pd3 = pairwise_distances(w_list,feature_vals)\n            print(\"Distance of closest halfspace cluster to each labelling func hyperplane \\n\", pd3.min(axis=1)[:])\n            \n             # get current split\n            dataset_left, dataset_right = self.split(dataset, feature_vals,feature_bias)\n            # check if childs are not null\n            print(\"dataset_left:\",len(dataset_left))\n            print(\"dataset_right:\",len(dataset_right))\n            # check if childs are not null\n            \n            if len(dataset_left)>0 and len(dataset_right)>0:\n                y, left_y, right_y = dataset[:, -1], dataset_left[:, -1], dataset_right[:, -1]\n                print(\"no of 0s in left:\",np.count_nonzero(left_y == 0))\n                print(\"no of 1s in left:\",np.count_nonzero(left_y == 1))\n                print(\"no of 0s in right:\",np.count_nonzero(right_y == 0))\n                print(\"no of 1s in right:\",np.count_nonzero(right_y == 1))\n                # compute information gain\n                curr_info_gain = self.information_gain(y, left_y, right_y, \"gini\")\n                print(\"curr_info_gain:\",curr_info_gain)\n                # update the best split if needed\n\n                best_split[\"feature_vals\"] = feature_vals\n                best_split[\"feature_bias\"] = feature_bias\n                best_split[\"dataset_left\"] = dataset_left\n                best_split[\"dataset_right\"] = dataset_right\n                best_split[\"info_gain\"] = curr_info_gain\n                \n                max_info_gain = curr_info_gain\n                eps_best = eps\n                min_samples_best = min_samples\n            else:\n                best_split[\"feature_vals\"] = 0\n                best_split[\"feature_bias\"] = 0\n                best_split[\"dataset_left\"] = 0\n                best_split[\"dataset_right\"] = 0\n                best_split[\"info_gain\"] = 0\n                    \n        else:\n            # dictionary to store the best split\n            print(\"============LAST LEVEL===========================\")\n           \n           \n#             # Create a logistic regression model\n            model = LogisticRegression()\n            # Check the number of unique classes\n            num_classes_y = len(np.unique(Y_train))\n            if num_classes_y<2:\n                if Y_train[-1]==0:\n                    Y_train[-1]=1\n                else:\n                    Y_train[-1]=0\n            # Fit the model to the data\n            model.fit(X_train, Y_train)\n\n            # Get the model coefficients (weights)\n            weights = model.coef_\n            bias = model.intercept_\n            weights = weights / np.linalg.norm(weights, axis=1)[:, None]\n            print(\"Model Weights:\")\n            print(weights)\n            \n            feature_vals=weights[0]\n            feature_bias=0\n            dataset_left, dataset_right = self.split(dataset, feature_vals,feature_bias)\n            print(\"dataset_left:\",len(dataset_left))\n            print(\"dataset_right:\",len(dataset_right))\n            # check if childs are not null\n            if len(dataset_left)>0 and len(dataset_right)>0:\n                y, left_y, right_y = dataset[:, -1], dataset_left[:, -1], dataset_right[:, -1]\n                print(\"no of 0s in left:\",np.count_nonzero(left_y == 0))\n                print(\"no of 1s in left:\",np.count_nonzero(left_y == 1))\n                print(\"no of 0s in right:\",np.count_nonzero(right_y == 0))\n                print(\"no of 1s in right:\",np.count_nonzero(right_y == 1))\n                # compute information gain\n                curr_info_gain = self.information_gain(y, left_y, right_y, \"gini\")\n                print(\"curr_info_gain:\",curr_info_gain)\n                # update the best split if needed\n                \n                best_split[\"feature_vals\"] = feature_vals\n                best_split[\"feature_bias\"] = feature_bias\n                best_split[\"dataset_left\"] = dataset_left\n                best_split[\"dataset_right\"] = dataset_right\n                best_split[\"info_gain\"] = curr_info_gain\n                max_info_gain = curr_info_gain\n            else:\n                best_split[\"feature_vals\"] = 0\n                best_split[\"feature_bias\"] = 0\n                best_split[\"dataset_left\"] = 0\n                best_split[\"dataset_right\"] = 0\n                best_split[\"info_gain\"] = 0\n        print(\"best_split_info_gain:\",best_split[\"info_gain\"])\n        return best_split\n\n        # return best split\n        return best_split\n\n    def split(self, dataset, feature_vals,feature_bias):\n        ''' function to split the data '''\n        dataset_left = np.array([row for row in dataset if (row[:-1]@feature_vals.T<0)])\n        dataset_right = np.array([row for row in dataset if (row[:-1]@feature_vals.T>=0)])\n        return dataset_left, dataset_right\n\n    def information_gain(self, parent, l_child, r_child, mode=\"entropy\"):\n        ''' function to compute information gain '''\n\n        weight_l = len(l_child) / len(parent)\n        weight_r = len(r_child) / len(parent)\n        if mode==\"gini\":\n            gain = self.gini_index(parent) - (weight_l*self.gini_index(l_child) + weight_r*self.gini_index(r_child))\n        else:\n            gain = self.entropy(parent) - (weight_l*self.entropy(l_child) + weight_r*self.entropy(r_child))\n        return gain\n\n    def entropy(self, y):\n        ''' function to compute entropy '''\n\n        class_labels = np.unique(y)\n        entropy = 0\n        for cls in class_labels:\n            p_cls = len(y[y == cls]) / len(y)\n            entropy += -p_cls * np.log2(p_cls)\n        return entropy\n\n    def gini_index(self, y):\n        ''' function to compute gini index '''\n\n        class_labels = np.unique(y)\n        gini = 0\n        for cls in class_labels:\n            p_cls = len(y[y == cls]) / len(y)\n            gini += p_cls**2\n        return 1 - gini\n\n    def calculate_leaf_value(self, Y):\n        ''' function to compute leaf node '''\n\n        Y = list(Y)\n        return max(Y, key=Y.count)\n\n    def print_tree(self, tree=None, indent=\" \"):\n        ''' function to print the tree '''\n\n        if not tree:\n            tree = self.root\n\n        if tree.value is not None:\n            print(tree.value)\n\n        else:\n            print(\"%sleft:\" % (indent), end=\"\")\n            self.print_tree(tree.left, indent + indent)\n            print(\"%sright:\" % (indent), end=\"\")\n            self.print_tree(tree.right, indent + indent)\n\n    def fit(self, X, Y,X_val,Y_val,eps,min_samples,num_hidden_nodes):\n        ''' function to train the tree '''\n\n        dataset = np.concatenate((X, Y), axis=1)\n        dataset_val = np.concatenate((X_val, Y_val), axis=1)\n        self.root = self.build_tree(dataset,dataset_val,eps,min_samples,num_hidden_nodes)\n\n    def predict(self, X):\n        ''' function to predict new dataset '''\n\n        preditions = [self.make_prediction(x, self.root) for x in X]\n        return preditions\n\n    def make_prediction(self, x, tree):\n        ''' function to predict a single data point '''\n\n        if tree.value!=None: return tree.value\n        feature_vals = tree.feature_vals\n        feature_bias = tree.feature_bias\n        if x@feature_vals.T<0:#+feature_bias<0:\n            return self.make_prediction(x, tree.left)\n        else:\n            return self.make_prediction(x, tree.right)\n","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Define dictionaries\nseed=365\nnum_levels=4\nthreshold = 0 #data seperation distance\n\noptimizer_name ='Adam'\nmodep='pwc' \noutput_dim=1\nnum_epoch=256\n# saved_epochs = [0]\n# saved_epochs = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,20,22,24,26,28,30,32,64,128,256,512,1024,2048, \n                # 4096, 8192]# , 16384, 32768, 65536, 125000, 250000]\nsaved_epochs = list(range(0,num_epoch+1,num_epoch//10))\nweight_decay=0.0\nno_of_batches=10 #[1,10,100]\n\ndata_configs = [\n    {\"input_dim\": 20, \"num_data\": 40000},\n    {\"input_dim\": 100, \"num_data\": 60000},\n    {\"input_dim\": 500, \"num_data\": 100000}\n]\n\n# Code block to run for each dictionary\nfor config in data_configs:\n    input_dim = config[\"input_dim\"]\n    num_data = config[\"num_data\"]\n\n    \n    \n    ((data_x, labels), (w_list, b_list, vals), stats) = data_gen_decision_tree(\n                                                dim=input_dim, seed=seed, num_levels=num_levels,\n                                                num_data=num_data)\n    seed_set=seed\n    w_list_old = np.array(w_list)\n    b_list_old = np.array(b_list)\n    print(sum(labels==1))\n    print(sum(labels==0))\n#     print(labels.shape)\n#     print(vals)\n#     print(stats)\n    print(\"Seed= \",seed_set)\n    num_data = len(data_x)\n    num_train= num_data//2\n    num_vali = num_data//4\n    num_test = num_data//4\n    train_data = data_x[:num_train,:]\n    train_data_labels = labels[:num_train]\n\n    vali_data = data_x[num_train:num_train+num_vali,:]\n    vali_data_labels = labels[num_train:num_train+num_vali]\n\n    test_data = data_x[num_train+num_vali :,:]\n    test_data_labels = labels[num_train+num_vali :]\n    \n    \n    \n    #DLGN DT code\n    print(\"DLGN Decision Tree =================================================\")\n\n#             num_epoch=256\n    dlgn_mode = 'dlgn'\n    beta=3\n    lr= 0.001\n    X = train_data\n    print(X.shape)\n    Y = train_data_labels.reshape(-1,1)\n    print(Y.shape)\n    X_val = vali_data\n    print(X_val.shape)\n    Y_val = vali_data_labels.reshape(-1,1)\n    print(Y_val.shape)\n\n    eps_list=[0.3]\n    min_samples_list=[5]\n    num_hidden_nodes=[500,500,500]\n    max_depth_list = [3]\n    results_list = []\n    for eps in eps_list:\n        print(\"================= EPS: \",eps,\"=================================\")\n        for min_samples in min_samples_list:\n            print(\"================= min_samples: \",min_samples,\"=================================\")\n            for max_depth in max_depth_list:\n                print(\"================= max_depth: \",max_depth,\"=================================\")\n                classifier = DecisionTreeClassifierDT(min_samples_split=int(0.01*X.shape[0]), max_depth=max_depth)\n                classifier.fit(X,Y,X_val,Y_val,eps,min_samples,num_hidden_nodes)\n\n                Y_pred = classifier.predict(test_data)\n                Test_accuracy = accuracy_score(test_data_labels.reshape(-1,1), Y_pred)\n                print(\"Test Accuracy:\", Test_accuracy)\n\n\n                # Append the result to the list\n                results_list.append({\n                    \"eps\": eps,\n                    \"min_samples\": min_samples,\n                    \"max_depth\": max_depth,\n                    \"Test Accuracy\": round(Test_accuracy, 5)\n                })\n                \n    # Create Pandas DataFrames\n    df_dlgn = pd.DataFrame(results_list)\n\n    # Set pandas display options to show more rows and columns\n    pd.set_option('display.max_rows', None)\n    pd.set_option('display.max_columns', None)\n    print(f\"Running code for input_dim={input_dim}, num_data={num_data}\")\n    # Print the full tables\n    print(\"\\nDLGN MODE: dlgn\\n\")\n    print(df_dlgn)\n\n    #DLGN DT code ends\n    \n\n\n#     print(\"---\" * 30)\n#     # Create lists to store results for each dlgn_mode and beta\n#     results_list_dlgn = []\n#     results_list_dlgn_sf = []\n    \n#     for dlgn_mode in ('dlgn','dlgn_sf'):\n#         print(f\"\\n{'='*30} DLGN MODE: {dlgn_mode} {'='*30}\\n\")\n#         for beta in (3,10,30):\n#             print(f\"\\n{'='*20} BETA: {beta} {'='*20}\\n\")\n#             # Create lists to store results for the current dlgn_mode and beta\n#             results_list = []\n#             for lr in (0.001,0.01,0.02):\n#                 print(\"LEARNING RATE ====================\", lr)\n\n              \n#                 #DLGN DT code\n#                 print(\"DLGN Decision Tree =================================================\")\n\n#     #             num_epoch=256\n#                 X = train_data\n#                 print(X.shape)\n#                 Y = train_data_labels.reshape(-1,1)\n#                 print(Y.shape)\n\n#                 eps=[0.2]\n#                 min_samples=[5]\n#                 num_hidden_nodes=[500,500,500,500]\n#                 classifier = DecisionTreeClassifierDT(min_samples_split=1, max_depth=3)\n#                 classifier.fit(X,Y,eps,min_samples,num_hidden_nodes)\n\n#                 Y_pred = classifier.predict(test_data) \n#                 print(accuracy_score(test_data_labels, Y_pred))\n#                 #DLGN DT code ENDs\n           \n#                 print(\"========================================================================\")\n#                 layer_configs = [\n#                     {\"num_hidden_layers\": 3, \"num_hidden_nodes\": [10,10,10]},\n#                     {\"num_hidden_layers\": 3, \"num_hidden_nodes\": [20,20,20]},\n#                     {\"num_hidden_layers\": 3, \"num_hidden_nodes\": [50,50,50]},\n#                     {\"num_hidden_layers\": 4, \"num_hidden_nodes\": [10,10,10,10]},\n#                     {\"num_hidden_layers\": 4, \"num_hidden_nodes\": [20,20,20,20]},\n#                     {\"num_hidden_layers\": 4, \"num_hidden_nodes\": [50,50,50,50]},\n#                     {\"num_hidden_layers\": 5, \"num_hidden_nodes\": [10,10,10,10,10]},\n#                     {\"num_hidden_layers\": 5, \"num_hidden_nodes\": [20,20,20,20,20]},\n#                     {\"num_hidden_layers\": 5, \"num_hidden_nodes\": [50,50,50,50,50]}\n                    \n                    \n# #                     {\"num_hidden_layers\": 3, \"num_hidden_nodes\": [100,100,100]},\n# #                     {\"num_hidden_layers\": 3, \"num_hidden_nodes\": [500,500,500]},\n# #                     {\"num_hidden_layers\": 4, \"num_hidden_nodes\": [500,500,500,500]},\n# #                     {\"num_hidden_layers\": 4, \"num_hidden_nodes\": [100,100,100,100]},\n# #                     {\"num_hidden_layers\": 5, \"num_hidden_nodes\": [100,100,100,100,100]},\n# #                     {\"num_hidden_layers\": 5, \"num_hidden_nodes\": [250,250,250,250,250]},\n# #                     {\"num_hidden_layers\": 5, \"num_hidden_nodes\": [500,500,500,500,500]},\n# #                     {\"num_hidden_layers\": 6, \"num_hidden_nodes\": [100,100,100,100,100,100]},\n# #                     {\"num_hidden_layers\": 6, \"num_hidden_nodes\": [500,500,500,500,500,500]}\n#                 ]\n#                 for layer_config in layer_configs:\n#                     num_hidden_layers = layer_config[\"num_hidden_layers\"]\n#                     num_hidden_nodes = layer_config[\"num_hidden_nodes\"]\n\n#                     print(f\"Running code for num_hidden_layers={num_hidden_layers}, num_hidden_nodes={num_hidden_nodes}\")\n\n#                     max_no_of_nodes=max(num_hidden_nodes)\n\n#                     set_torchseed(6675)\n#                     DLGN_init= DLGN_FC(input_dim=input_dim, output_dim=1, num_hidden_nodes=num_hidden_nodes, beta=beta, dlgn_mode=dlgn_mode)\n#     #                     for name,parameter in DLGN_init.named_parameters():\n#     #                         print(name)\n#     #                         print(parameter.shape)\n\n#                     train_parameter_masks=dict()\n#                     for name,parameter in DLGN_init.named_parameters():\n#                         if \"val\" in name:\n#                             train_parameter_masks[name]=torch.ones_like(parameter) #*0.001 # Updating all value network layers\n#                         if \"gat\" in name:\n#                             train_parameter_masks[name]=torch.ones_like(parameter)\n\n\n#                             # train_parameter_masks[name][:num_neurons_set] *= 0.\n#                         train_parameter_masks[name].to(device)\n\n#                     set_torchseed(5000)\n#                     train_losses, DLGN_obj_final, DLGN_obj_store, losses , debug_models= train_dlgn(train_data_curr=train_data,\n#                                                                 vali_data_curr=vali_data,\n#                                                                 test_data_curr=test_data,\n#                                                                 train_labels_curr=train_data_labels,\n#                                                                 vali_labels_curr=vali_data_labels,\n#                                                                 test_labels_curr=test_data_labels,\n#                                                                 DLGN_obj=deepcopy(DLGN_init),\n#                                                                 parameter_mask=train_parameter_masks)\n\n\n#                     torch.cuda.empty_cache() \n#                     losses=np.array(losses)\n\n\n#                     test_outputs_values, test_outputs_gate_scores =DLGN_obj_final(torch.Tensor(test_data))\n#                     test_preds = test_outputs_values[-1]\n#                     test_preds = test_preds.detach().numpy()\n#                     Test_error=np.sum(test_data_labels != (np.sign(test_preds[:,0])+1)//2)\n#                     Num_test_data=len(test_data_labels)\n#                     print(\"Test_error=\",Test_error)\n#                     print(\"Num_test_data=\",Num_test_data)\n#                     test_accuracy = 1-Test_error/Num_test_data\n#                     print(\"Test Accuracy=\",test_accuracy)\n#                     # Append the result to the list\n#                     results_list.append({\n#                         \"Beta\": beta,\n#                         \"LR\": lr,\n#                         \"Hidden Layers\": layer_config[\"num_hidden_layers\"],\n#                         \"Hidden Nodes\": str(layer_config[\"num_hidden_nodes\"]),\n#                         \"Test Accuracy\": round(test_accuracy, 5)\n#                     })\n                    \n# # Append the results for the current dlgn_mode and beta to the overall list\n#             if dlgn_mode == 'dlgn':\n#                 results_list_dlgn.extend(results_list)\n#             elif dlgn_mode == 'dlgn_sf':\n#                 results_list_dlgn_sf.extend(results_list)\n                \n#     # Create Pandas DataFrames\n#     df_dlgn = pd.DataFrame(results_list_dlgn)\n#     df_dlgn_sf = pd.DataFrame(results_list_dlgn_sf)\n\n#     # Set pandas display options to show more rows and columns\n#     pd.set_option('display.max_rows', None)\n#     pd.set_option('display.max_columns', None)\n#     print(f\"Running code for input_dim={input_dim}, num_data={num_data}\")\n#     # Print the full tables\n#     print(\"\\nDLGN MODE: dlgn\\n\")\n#     print(df_dlgn)\n\n#     print(\"\\nDLGN MODE: dlgn_sf\\n\")\n#     print(df_dlgn_sf)\n# '''","metadata":{"trusted":true},"execution_count":null,"outputs":[]}]}