{"metadata":{"colab":{"provenance":[]},"gpuClass":"standard","kernelspec":{"name":"python3","display_name":"Python 3","language":"python"},"language_info":{"name":"python","version":"3.10.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"vscode":{"interpreter":{"hash":"00ad5f1807eee938f7727b558c9158a01118eae9a3a444b82c1137c2e4c2794d"}},"kaggle":{"accelerator":"none","dataSources":[],"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"#@title Imports\n%reset -f ","metadata":{"id":"kjNxUX51Lh0k","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import pandas as pd","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import pylab\nimport scipy.io","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"\n\nimport numpy as np\nfrom itertools import product as cartesian_prod\n\nimport matplotlib.pyplot as plt\n\n\nfrom copy import deepcopy\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.optim as optim\nfrom tqdm import tqdm\nimport os\nimport argparse\nimport sys\n\nfrom sklearn.svm import SVC\nnp.set_printoptions(precision=2)\n\n\ndef sigmoid(u):\n    u = np.maximum(u,-100)\n    u = np.minimum(u,100)\n    return 1/(1+np.exp(-u))\n","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from sklearn.linear_model import LogisticRegression\nfrom sklearn.neighbors import KNeighborsClassifier\nfrom sklearn.metrics import pairwise_distances","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"class Args:\n    def __init__(self):\n        self.numlayer=4\n        self.numnodes=10\n        self.beta=5.\n        self.lr=1.\n        ","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\nprint(device)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"**Variable parameters**","metadata":{"id":"HDx4xoSOFzR2"}},{"cell_type":"code","source":"#@title Synthetic data\ndef set_npseed(seed):\n    np.random.seed(seed)\n\n\ndef set_torchseed(seed):\n    torch.manual_seed(seed)\n    torch.cuda.manual_seed(seed)\n    torch.cuda.manual_seed_all(seed)\n    torch.backends.cudnn.deterministic = True\n    torch.backends.cudnn.benchmark = False\n\n\n#classification data\n\ndef data_gen_decision_tree(num_data=1000, dim=2, seed=0, w_list=None, b_list=None,vals=None, num_levels=2):        \n    set_npseed(seed=seed)\n\n    # Construct a complete decision tree with 2**num_levels-1 internal nodes,\n    # e.g. num_levels=2 means there are 3 internal nodes.\n    # w_list, b_list is a list of size equal to num_internal_nodes\n    # vals is a list of size equal to num_leaf_nodes, with values +1 or 0\n    num_internal_nodes = 2**num_levels - 1\n    num_leaf_nodes = 2**num_levels\n    stats = np.zeros(num_internal_nodes+num_leaf_nodes) #stores the num of datapoints at each node so at 0(root) all data points will be present\n\n    if vals is None: #when val i.e., labels are not provided make the labels dynamically\n        vals = np.arange(0,num_internal_nodes+num_leaf_nodes,1,dtype=np.int32)%2 #assign 0 or 1 label to the node based on whether its numbering is even or odd\n        vals[:num_internal_nodes] = -99 #we put -99 to the internal nodes as only the values of leaf nodes are counted\n\n    if w_list is None: #if the w values of the nodes (hyperplane eqn) are not provided then generate dynamically\n        w_list = np.random.standard_normal((num_internal_nodes, dim))\n        w_list = w_list/np.linalg.norm(w_list, axis=1)[:, None] #unit norm w vects\n        b_list = np.zeros((num_internal_nodes))\n\n    '''\n    np.random.random_sample\n    ========================\n    Return random floats in the half-open interval [0.0, 1.0).\n\n    Results are from the \"continuous uniform\" distribution over the\n    stated interval.  To sample :math:`Unif[a, b), b > a` multiply\n    the output of `random_sample` by `(b-a)` and add `a`::\n\n        (b - a) * random_sample() + a\n    '''\n\n#     data_x = np.random.random_sample((num_data, dim))*2 - 1. #generate the datas in range -1 to +1\n#     relevant_stats = data_x @ w_list.T + b_list #stores the x.wT+b value of each nodes for all data points(num_data x num_nodes) to check if > 0 i.e will follow right sub tree route or <0 and will follow left sub tree route\n#     curr_index = np.zeros(shape=(num_data), dtype=int) #stores the curr index for each data point from root to leaf. So initially a datapoint starts from root but then it can go to right or left if it goes to right its curr index will become 2 from 0 else 1 from 0 then in next iteration from say 2 it goes to right then it will become 6\n\n    data_x = np.random.standard_normal((num_data, dim))\n    data_x /= np.sqrt(np.sum(data_x**2, axis=1, keepdims=True))\n    relevant_stats = data_x @ w_list.T + b_list\n    curr_index = np.zeros(shape=(num_data), dtype=int)\n    \n    for level in range(num_levels):\n        nodes_curr_level=list(range(2**level - 1,2**(level+1)-1  ))\n        for el in nodes_curr_level:\n#             b_list[el]=-1*np.median(relevant_stats[curr_index==el,el])\n            relevant_stats[:,el] += b_list[el]\n        decision_variable = np.choose(curr_index, relevant_stats.T) #based on the curr index will choose the corresponding node value of the datapoint\n\n        # Go down and right if wx+b>0 down and left otherwise.\n        # i.e. 0 -> 1 if w[0]x+b[0]<0 and 0->2 otherwise\n        curr_index = (curr_index+1)*2 - (1-(decision_variable > 0)) #update curr index based on the desc_variable\n        \n\n    bound_dist = np.min(np.abs(relevant_stats), axis=1) #finds the abs value of the minm node value of a datapoint. If some node value of a datapoint is 0 then that data point exactly passes through a hyperplane and we remove all such datapoints\n    thres = threshold\n    labels = vals[curr_index] #finally labels for each datapoint is assigned after traversing the whole tree\n\n    data_x_pruned = data_x[bound_dist>thres] #to distingush the hyperplanes seperately for 0 1 labels (classification)\n    #removes all the datapoints that passes through a node hyperplane\n    labels_pruned = labels[bound_dist>thres]\n    relevant_stats = np.sign(data_x_pruned @ w_list.T + b_list) #storing only +1 or -1 for a particular node if it is active or not\n    nodes_active = np.zeros((len(data_x_pruned),  num_internal_nodes+num_leaf_nodes), dtype=np.int32) #stores node actv or not for a data\n\n    for node in range(num_internal_nodes+num_leaf_nodes):\n        if node==0:\n            stats[node]=len(relevant_stats) #for root node all datapoints are present\n            nodes_active[:,0]=1 #root node all data points active status is +1\n            continue\n        parent = (node-1)//2\n        nodes_active[:,node]=nodes_active[:,parent]\n        right_child = node-(parent*2)-1 # 0 means left, 1 means right 1 has children 3,4\n        #finds if it is a right child or left of the parent\n        if right_child==1:\n            nodes_active[:,node] *= relevant_stats[:,parent]>0 #if parent node val was >0 then this right child of parent is active\n        if right_child==0:\n            nodes_active[:,node] *= relevant_stats[:,parent]<0 #else left is active\n        stats = nodes_active.sum(axis=0) #updates the status i.e., no of datapoints active in that node (root has all active then gradually divided in left right)\n    return ((data_x_pruned, labels_pruned), (w_list, b_list, vals), stats)","metadata":{"id":"2iXNxcu4L6kT","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# print(train_data.shape)\n# print(train_data_labels.shape)\n\n# print(test_data.shape)\n# print(test_data_labels.shape)\n","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"class DLGN_FC(nn.Module):\n    def __init__(self, input_dim=None, output_dim=None, num_hidden_nodes=[], beta=30, mode='pwc'):\t\t\n        super(DLGN_FC, self).__init__()\n        self.num_hidden_layers = len(num_hidden_nodes)\n        self.beta=beta  # Soft gating parameter\n        self.mode = mode\n        self.num_nodes=[input_dim]+num_hidden_nodes+[output_dim]\n        self.gating_layers=nn.ModuleList()\n        self.value_layers=nn.Parameter(torch.randn([1]+num_hidden_nodes)/100.) #[1, 12, 12, 12, 12]\n        self.num_layer = len(num_hidden_nodes)\n        self.num_hidden_nodes = num_hidden_nodes\n        for i in range(self.num_hidden_layers+1):\n            if i!=self.num_hidden_layers:\n                temp = nn.Linear(self.num_nodes[0], self.num_nodes[i+1], bias=False)\n                self.gating_layers.append(temp)\n\n    def set_parameters_with_mask(self, to_copy, parameter_masks):\n        # self and to_copy are DLGN_FC objects with same architecture\n        # parameter_masks is compatible with dict(to_copy.named_parameters())\n        for (name, copy_param) in to_copy.named_parameters():\n            copy_param = copy_param.clone().detach()\n            orig_param  = self.state_dict()[name]\n            if name in parameter_masks:\n                param_mask = parameter_masks[name]>0\n                orig_param[param_mask] = copy_param[param_mask]\n            else:\n                orig_param = copy_param.data.detach()\n\n    def return_gating_functions(self):\n        effective_weights = []\n        for i in range(self.num_hidden_layers):\n            curr_weight = self.gating_layers[i].weight.detach().clone()\n            # curr_weight /= torch.norm(curr_weight, dim=1, keepdim=True)\n            effective_weights.append(curr_weight)\n        return effective_weights\n        # effective_weights (and effective biases) is a list of size num_hidden_layers\n\n\n    def forward(self, x):\n        for el in self.parameters():\n            if el.is_cuda:\n                device = torch.device('cuda')\n            else:\n                device = torch.device('cpu')\n        values=[torch.ones(x.shape).to(device)]\n        for i in range(self.num_hidden_layers):\n            fiber = [len(x)]+[1]*self.num_layer\n#             print(\"fiber:\",fiber)\n            fiber[i+1] = self.num_hidden_nodes[i]\n#             print(\"fiber:\",fiber)\n            fiber = tuple(fiber)\n#             print(\"fiber:\",fiber)\n            gate_score = torch.sigmoid( self.beta*(x@self.gating_layers[i].weight.T))#/\n                #   torch.norm(self.gating_layers[i].weight, dim=1, keepdim=True).T) \n#             print(\"gate_score:\",gate_score.shape)\n            gate_score = gate_score.reshape(fiber) \n#             print(\"gate_score:\",gate_score.shape)\n            if i==0:\n                cp = gate_score\n#                 print(\"cp:\",cp.shape)\n            else:\n                cp = cp*gate_score \n#                 print(\"cp:\",cp.shape)\n#             print(\"return:\",torch.sum(cp*self.value_layers, dim=(1,2,3,4)).shape)\n        return torch.sum(cp*self.value_layers, dim=(1,2,3,4))","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#@title Train DLGN model\ndef train_dlgn (DLGN_obj, train_data_curr,vali_data_curr,test_data_curr,\n                train_labels_curr,test_labels_curr,vali_labels_curr,\n                parameter_mask=dict()):\n    # DLGN_obj is the initial network\n    # parameter_mask is a dictionary compatible with dict(DLGN_obj.named_parameters())\n    # if a key corresponding to a named_parameter is not present it is assumed to be all ones (i.e it will be updated)\n\n    # Assuming that we are on a CUDA machine, this should print a CUDA device:\n\n    # Speed up of a factor of over 40 by using GPU instead of CPU\n    # Final train loss of 0.02 and test acc of 74%\n    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n    # device = torch.device('cpu')\n    DLGN_obj.to(device)\n\n    criterion = nn.CrossEntropyLoss()\n\n\n\n\n    optimizer = optim.Adam(DLGN_obj.parameters(), lr=lr)\n\n\n\n    train_data_torch = torch.Tensor(train_data_curr)\n    vali_data_torch = torch.Tensor(vali_data_curr)\n    test_data_torch = torch.Tensor(test_data_curr)\n\n    train_labels_torch = torch.tensor(train_labels_curr, dtype=torch.int64)\n    test_labels_torch = torch.tensor(test_labels_curr, dtype=torch.int64)\n    vali_labels_torch = torch.tensor(vali_labels_curr, dtype=torch.int64)\n\n    num_batches = no_of_batches\n    batch_size = len(train_data_curr)//num_batches\n    losses=[]\n    DLGN_obj_store = []\n    best_vali_error = len(vali_labels_curr)\n\n\n    # print(\"H3\")\n    # print(DLGN_params)\n    debug_models= []\n    train_losses = []\n    tepoch = tqdm(range(saved_epochs[-1]+1))\n    for epoch in tepoch:  # loop over the dataset multiple times\n        if epoch in update_value_epochs:\n            # updating the value pathdim vector by optimising \n\n            train_preds =DLGN_obj(torch.Tensor(train_data_curr).to(device)).reshape((-1,1))\n            criterion = nn.CrossEntropyLoss()\n            outputs = torch.cat((-1*train_preds,train_preds), dim=1)\n            targets = torch.tensor(train_labels_curr, dtype=torch.int64).to(device)\n\n            train_loss = criterion(outputs, targets)\n            print(\"Loss before updating value_net at epoch\", epoch, \" is \", train_loss)\n            print(\"Total path abs value\", torch.abs(DLGN_obj.value_layers.cpu().detach()).sum().numpy())\n\n            ew = DLGN_obj.return_gating_functions()\n            cp_feat1 = sigmoid(beta*np.dot(train_data_curr,ew[0].cpu().T).reshape(-1,num_neuron,1,1,1))\n#             print(\"cp_feat1:\",cp_feat1.shape)\n            cp_feat2 = sigmoid(beta*np.dot(train_data_curr,ew[1].cpu().T).reshape(-1,1,num_neuron,1,1))\n#             print(\"cp_feat2:\",cp_feat2.shape)\n            cp_feat3 = sigmoid(beta*np.dot(train_data_curr,ew[2].cpu().T).reshape(-1,1,1,num_neuron,1))\n#             print(\"cp_feat3:\",cp_feat3.shape)\n            cp_feat4 = sigmoid(beta*np.dot(train_data_curr,ew[3].cpu().T).reshape(-1,1,1,1,num_neuron))\n#             print(\"cp_feat4:\",cp_feat4.shape)\n            cp_feat = cp_feat1 * cp_feat2 * cp_feat3 * cp_feat4\n#             print(\"cp_feat:\",cp_feat.shape)\n            cp_feat_vec = cp_feat.reshape((len(cp_feat),-1))\n#             print(\"cp_feat_vec:\",cp_feat_vec.shape)\n\n            clf = LogisticRegression(C=0.03, fit_intercept=False,max_iter=1000, penalty=\"l1\", solver='liblinear')\n            clf.fit(2*cp_feat_vec, train_labels_curr)\n            value_wts  = clf.decision_function(np.eye(num_neuron**num_layer)).reshape(1,num_neuron,num_neuron,num_neuron,num_neuron)\n#             print(\"value_wts:\",value_wts.shape)\n            A= DLGN_obj.value_layers.detach()\n            A[:] = torch.Tensor(value_wts)\n\n            train_preds =DLGN_obj(torch.Tensor(train_data_curr).to(device)).reshape((-1,1))\n            criterion = nn.CrossEntropyLoss()\n            outputs = torch.cat((-1*train_preds,train_preds), dim=1)\n            targets = torch.tensor(train_labels_curr, dtype=torch.int64).to(device)\n            train_loss = criterion(outputs, targets)\n            print(\"Loss after updating value_net at epoch\", epoch, \" is \", train_loss)\t\t\t\n            print(\"Total path abs value\", torch.abs(DLGN_obj.value_layers.cpu().detach()).sum().numpy())\n            if epoch in saved_epochs:\n                DLGN_obj_copy = deepcopy(DLGN_obj)\n                DLGN_obj_copy.to(torch.device('cpu'))\n                DLGN_obj_store.append(DLGN_obj_copy)\n                train_losses.append(train_loss)\n\n        for batch_start in range(0,len(train_data_curr),batch_size):\n            if (batch_start+batch_size)>len(train_data_curr):\n                break\n            optimizer.zero_grad()\n            inputs = train_data_torch[batch_start:batch_start+batch_size]\n            targets = train_labels_torch[batch_start:batch_start+batch_size].reshape(batch_size)\n            criterion = nn.CrossEntropyLoss()\n            inputs = inputs.to(device)\n            targets = targets.to(device)\n            preds = DLGN_obj(inputs).reshape(-1,1)\n            # preds_clone = preds.detach().clone().cpu().numpy()[:,0]\n            # targets_clone = targets.detach().clone().cpu().numpy()\n            # coeff = (0.5-targets_clone)/(sigmoid(2*preds_clone)-targets_clone)\n            # print(coeff.shape)\n\n            # print(coeff.min())\n            # print(coeff.mean())\n            # print(coeff.max())\n            outputs = torch.cat((-1*preds, preds), dim=1)\n            loss = criterion(outputs, targets)\n            # loss = loss*torch.tensor(coeff, device=device)\t\n            # loss = loss.mean()\t\t\n            loss.backward()\n            for name,param in DLGN_obj.named_parameters():\n                if \"val\" in name:\n                    param.grad *= 0.0\n                if \"gat\" in name:\n                    param.grad *= 1.0\n            optimizer.step()\n\n        train_preds =DLGN_obj(torch.Tensor(train_data_curr).to(device)).reshape(-1,1)\n        criterion = nn.CrossEntropyLoss()\n        outputs = torch.cat((-1*train_preds,train_preds), dim=1)\n        targets = torch.tensor(train_labels_curr, dtype=torch.int64).to(device)\n        train_loss = criterion(outputs, targets)\n        if epoch%5 == 0:\n            print(\"Loss after updating at epoch \", epoch, \" is \", train_loss)\n            test_preds =DLGN_obj(test_data_torch.to(device)).reshape(-1,1)\n            test_preds = test_preds.detach().cpu().numpy()\n            print(\"Test error=\",np.sum(test_labels_curr != (np.sign(test_preds[:,0])+1)//2 ))\n        if train_loss < 0.005:\n            break\n        if np.isnan(train_loss.detach().cpu().numpy()):\n            break\n\n        losses.append(train_loss.cpu().detach().clone().numpy())\n        inputs = vali_data_torch.to(device)\n        targets = vali_labels_torch.to(device)\n        preds =DLGN_obj(inputs).reshape(-1,1)\n        vali_preds = torch.cat((-1*preds, preds), dim=1)\n        vali_preds = torch.argmax(vali_preds, dim=1)\n        vali_error= torch.sum(targets!=vali_preds)\n        if vali_error < best_vali_error:\n            DLGN_obj_return = deepcopy(DLGN_obj)\n            best_vali_error = vali_error\n    plt.figure()\n    plt.title(\"DLGN loss vs epoch\")\n    plt.plot(losses)\n    # \tif not os.path.exists('figures'):\n    # \t\tos.mkdir('figures')\n\n    # \tfilename = 'figures/'+filename_suffix +'.pdf'\n    # \tplt.savefig(filename)\n    DLGN_obj_return.to(torch.device('cpu'))\n    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n    # device = torch.device('cpu')\n    return train_losses, DLGN_obj_return, DLGN_obj_store, losses, debug_models","metadata":{"id":"Ncr5k6koMbD_","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"**Training a DLGN model**","metadata":{"id":"PpN8Yby7Fllw"}},{"cell_type":"code","source":"args =  Args()\n\nnum_layer = args.numlayer\nnum_neuron = args.numnodes\nbeta = args.beta\nlr=args.lr\n\nsaved_epochs = list(range(0,301,10)) + list(range(301,15301,500))\n# saved_epochs = list(range(0,501,10))\nupdate_value_epochs = list(range(0,15301,100))# \n\n\nno_of_batches=10 #[1,10,100]\nweight_decay=0.0\nnum_hidden_nodes=[num_neuron]*num_layer\n\n# Define dictionaries\nseed=365\nnum_levels=4\nthreshold = 0 #data seperation distance\n\noptimizer_name ='Adam'\nmodep='pwc' \noutput_dim=1\n\n# saved_epochs = list(range(0,num_epoch+1,num_epoch//10))\nweight_decay=0.0\n\ndata_configs = [\n    {\"input_dim\": 20, \"num_data\": 40000},\n    {\"input_dim\": 100, \"num_data\": 60000},\n    {\"input_dim\": 500, \"num_data\": 100000}\n]\n\nfor config in data_configs:\n    input_dim = config[\"input_dim\"]\n    num_data = config[\"num_data\"]\n    \n    print(\"==========input_dim:\",input_dim,\"==============num_data:\",num_data)\n\n    \n    \n    ((data_x, labels), (w_list, b_list, vals), stats) = data_gen_decision_tree(\n                                                dim=input_dim, seed=seed, num_levels=num_levels,\n                                                num_data=num_data)\n    seed_set=seed\n    w_list_old = np.array(w_list)\n    b_list_old = np.array(b_list)\n    print(sum(labels==1))\n    print(sum(labels==0))\n#     print(labels.shape)\n#     print(vals)\n#     print(stats)\n    print(\"Seed= \",seed_set)\n    num_data = len(data_x)\n    num_train= num_data//2\n    num_vali = num_data//4\n    num_test = num_data//4\n    train_data = data_x[:num_train,:]\n    train_data_labels = labels[:num_train]\n\n    vali_data = data_x[num_train:num_train+num_vali,:]\n    vali_data_labels = labels[num_train:num_train+num_vali]\n\n    test_data = data_x[num_train+num_vali :,:]\n    test_data_labels = labels[num_train+num_vali :]    \n\n    print(\"---\" * 30)\n    set_torchseed(41972)\n    # set_torchseed(5612)\n    DLGN_init= DLGN_FC(input_dim=input_dim, output_dim=1, num_hidden_nodes=num_hidden_nodes, beta=beta)\n\n    train_parameter_masks=dict()\n    \n    for name,parameter in DLGN_init.named_parameters():\n        if \"val\" in name:\n            train_parameter_masks[name]=torch.ones_like(parameter)# Updating all value network layers\n        if \"gat\" in name:\n            train_parameter_masks[name]=torch.ones_like(parameter)\n        train_parameter_masks[name].to(device)\n\n\n        \n\n\n\n\n\n\n    set_torchseed(5000)\n    train_losses, DLGN_obj_final, DLGN_obj_store, losses , debug_models= train_dlgn(train_data_curr=train_data,\n                                                vali_data_curr=vali_data,\n                                                test_data_curr=test_data,\n                                                train_labels_curr=train_data_labels,\n                                                vali_labels_curr=vali_data_labels,\n                                                test_labels_curr=test_data_labels,\n                                                DLGN_obj=deepcopy(DLGN_init),\n                                                parameter_mask=train_parameter_masks,\n                                                )\n\n\n    torch.cuda.empty_cache() \n    losses=np.array(losses)\n    \n    \n    device=torch.device('cpu')\n    train_preds =DLGN_obj_final(torch.Tensor(train_data).to(device)).reshape(-1,1)\n    criterion = nn.CrossEntropyLoss()\n    outputs = torch.cat((-1*train_preds,train_preds), dim=1)\n    targets = torch.tensor(train_data_labels, dtype=torch.int64)\n    train_loss = criterion(outputs, targets)\n    train_preds = train_preds.detach().numpy()\n    # filename = 'outputs/'+filename_suffix+'.txt'\n    # original_stdout = sys.stdout\n    Train_error = np.sum(train_data_labels != (np.sign(train_preds[:,0])+1)//2)\n    Num_train_data = len(train_data_labels)\n    print(\"Train error=\",Train_error)\n    print(\"Num_train_data=\",Num_train_data)\n    print(\"Train_acc:\",1-Train_error/Num_train_data)\n    \n    test_preds =DLGN_obj_final(torch.Tensor(test_data)).reshape(-1,1)\n    test_preds = test_preds.detach().numpy()\n    # filename = 'outputs/'+filename_suffix+'.txt'\n    # original_stdout = sys.stdout\n    # with open(filename,'a') as f:\n    #     sys.stdout = f\n    #     print(\"Test error=\",np.sum(test_data_labels != (np.sign(test_preds[:,0])+1)//2 ))\n    #     print(\"Num_test_data=\",len(test_data_labels))\n    #     sys.stdout = original_stdout\n\n    Test_error = np.sum(test_data_labels != (np.sign(test_preds[:,0])+1)//2)\n    Num_test_data = len(test_data_labels)\n    print(\"Test error=\",Test_error)\n    print(\"Num_test_data=\",Num_test_data)\n    print(\"Test_acc:\",1-Test_error/Num_test_data)\n\n# print(DLGN_obj_store[-1].beta)\n    ","metadata":{"trusted":true},"execution_count":null,"outputs":[]}]}