Supplementary material contains
* Architecture variants, VGG-13 and ResNet-10

Architectures 
=============


Resnet 10
-----------
ResNet(
  (convLays): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=1, track_running_stats=True)
    (2): ReLU()
    (3): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=1, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=1, track_running_stats=True)
      (shortcut): Sequential()
    )
    (4): BasicBlock(
      (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=1, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=1, track_running_stats=True)
      (shortcut): Sequential(
        (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=1, track_running_stats=True)
      )
    )
    (5): BasicBlock(
      (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=1, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=1, track_running_stats=True)
      (shortcut): Sequential(
        (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=1, track_running_stats=True)
      )
    )
    (6): BasicBlock(
      (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=1, track_running_stats=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=1, track_running_stats=True)
      (shortcut): Sequential(
        (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=1, track_running_stats=True)
      )
    )
    (7): AvgPool2d(kernel_size=4, stride=4, padding=0)
    (8): Flatten(start_dim=1, end_dim=-1)
    (9): Linear(in_features=512, out_features=10, bias=True)
  )
)

VGG-13
------
Classifier(
  (conv1): B2lock2(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bnF): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv2): B2lock2(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bnF): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv2a): B2lock2(
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bnF): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv3): B2lock2(
    (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bnF): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv3a): B2lock2(
    (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bnF): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv4): B2lock2(
    (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bnF): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv4a): B2lock2(
    (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bnF): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv5): B2lock2(
    (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bnF): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv5a): B2lock2(
    (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bnF): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (flat): Flatten()
  (pred): Linear(in_features=512, out_features=10, bias=True)
)


Label noise computation: for cfg["labnoi"] giving label noise fraction:
---------------------------------------
		noi=cfg["labnoi"]*(1+1/cfg["num_classes"]) # we just choose randomly and permute, we expect that about 1/#classes get correctly assigend despite permutation
		cy=otrain_data[1]
		inds = np.random.choice(cy.shape[0], max(1, int(cy.shape[0] * noi)))
		labs = cy[inds]
		np.random.shuffle(labs)
		cy[inds] = labs
		otrain_data=(otrain_data[0],cy)
		

get attacks for cfg["adv"] stating attack
---------------------------------------
	def getAttack(netCl):
        nx,ny=[],[]
        cx,cy=[],[]
        import advertorch
        
        if cfg["adv"] in [10]:  adversary =  advertorch.attacks.LinfBasicIterativeAttack(netCl, loss_fn=nn.CrossEntropyLoss(), eps=0.1,nb_iter=10, eps_iter=0.01, clip_min=0.0, clip_max=1.0,targeted=cfg["adv"]>9)
        if cfg["adv"] in [30]: adversary = advertorch.attacks.PGDAttack(netCl, loss_fn=nn.CrossEntropyLoss(), eps=0.1,     nb_iter=10, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0,targeted=cfg["adv"]>9)
        for x,y in val_data:
          with tca.autocast():
            cx.append(x)
            cy.append(y)
            if len(cx)==128 or (cfg["dummy"] and len(cx)>8):
                if len(cx)%(16*128)==0: print("   Att",len(nx)*128)
                cx=torch.cat(cx).cuda()
                ocy=torch.cat(cy).cuda().long()
                cy=(ocy+1)% cfg["num_classes"] if cfg["adv"]>9 else ocy
                advx = adversary.perturb(cx,cy)
                nx.append(advx)
                ny.append(ocy)
                cx,cy=[],[]
                if cfg["dummy"]: break
        nx,ny=torch.cat(nx).cpu().numpy().astype(np.float16),torch.cat(ny).cpu().numpy()
		
		
LaSeNN for validation data: giving prediction y_pred
with wLogT stating weights of sample to predict and nearest neighbors, e.g., for w_q=0.94 and 3 neighors it is wLogT =[0.94,0.02,0.02,0.02]

    with torch.no_grad():
      for i,(ocx,cpuy) in enumerate(val_data):#range(nval):  # tqdm
        with tca.autocast():
            knnpred = getknn(i) #knnpred = network output of nearest neighbors
            opred = netCl(x) #network output
            orgy_pred = opred.argmax(axis=1).cpu().numpy()
            knn = torch.from_numpy(knn).cuda()            
            oy_pred=torch.cat([opred,knnpred],dim=0) 
            fpred = torch.sum(wLogT * oy_pred, dim=0).reshape(1, -1)
			y_pred = fpred.argmax(axis=1).cpu().numpy()
            
			


get neareset neighbors in batch mode, ie. in a set of training points "points" for query points "val_data" 
--------------------------------------
def subkNN(points,val_data,cfg,n0,model,logs,ay):    
    alldist,allind,allogs,ally,cx=[],[],[],[],[]
    tot = 0
    p = nnCosTo if cfg["met"] else nnL2To    
    doFlip=cfg["NNaug"] #augment, i.e.,flip samples for nearest neighbor, used only for pret-trained networks    
    for i, (x, y) in enumerate(val_data):        
        with torch.no_grad():# and tca.autocast(): #with tca.autocast():
                cx.append(x)
                if len(cx) ==8:
                    cx = torch.cat(cx, dim=0)
                    tot += len(cx)
                    cx = cx.cuda()
                    distances, indices = calldist(model, cx, points, cfg, gpu, op)
                    if doFlip:
                        xflip = torch.flip(cx, (3,))
                        distflip, indiflip=calldist(model, xflip, points, cfg, gpu, op)                   

                    for k in range(len(cx)):
                        cind = indices[k]
                        cdist = distances[k]
                        if cfg["NNaug"]:
                            condist=[cdist]
                            conind=[cind]
                            if doFlip:
                                condist.append(distflip[k])
                                conind.append(indiflip[k])
                            if rot:
                                condist.append(drl[k])
                                condist.append(drr[k])
                                conind.append(indrl[k])
                                conind.append(indrr[k])
                            for indcro in range(crop):
                                conind.append(icro[indcro][k])#[k*crop+indcro])
                                condist.append(dcro[indcro][k])#[k*crop+indcro])
                            condist = np.concatenate(condist)
                            conind = np.concatenate(conind)                            
                            sind = np.argsort(condist)
                            
                            #get unique samples (no duplicates if augment)
							cdist = condist[sind]
							cind = conind[sind]
							unique_elements, uindices = np.unique(cind, return_index=True)
							cind = unique_elements[np.argsort(uindices)[:cfg["nNN"]]]
							cdist=cdist[np.argsort(uindices)[:cfg["nNN"]]]                                                    
                        allogs.append(logs[cind])                        
                    cx = []
    return np.stack(allogs)
