#!/usr/bin/env python
# coding: utf-8

# In[ ]:


from google.colab import drive
drive.mount('/content/gdrive')


# In[ ]:


pip install PIMS


# In[ ]:


pip install umap-learn


# In[ ]:


pip install fastcluster


# In[ ]:


import torch
import numpy as np
from torchvision import datasets
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import torchvision
import torch
from torchvision import transforms
from torchvision.io import read_image

plt.rcParams['axes.facecolor'] = 'white'
plt.rcParams['figure.figsize'] = 9, 6


# In[ ]:


get_ipython().run_line_magic('matplotlib', 'inline')

# importing relevant libraries
import numpy as np
import pandas as pd
import scipy as sp
import matplotlib.pyplot as plt
import sklearn
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import cross_val_predict, StratifiedKFold
from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_curve, auc#plot_precision_recall_curve
from sklearn.datasets import make_classification
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm
from umap import UMAP
from pynndescent import NNDescent
from fastcluster import single
from scipy.cluster.hierarchy import cut_tree, fcluster, dendrogram
from scipy.spatial.distance import squareform
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from pims import ImageSequence
from PIL import Image
from scipy.spatial.distance import hamming
import warnings
warnings.filterwarnings('ignore')

# turning off automatic plot showing, and setting style
plt.style.use('bmh')


# In[ ]:


if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print(device)


# In[ ]:


# define the NN architecture
class ConvAutoencoder(nn.Module):
    def __init__(self):
        super(ConvAutoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            # nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
            # nn.ReLU(),
        )
        self.decoder = nn.Sequential(
            # nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
            # nn.ReLU(),
            nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(16, 3, kernel_size=4, stride=2, padding=1),
            nn.Sigmoid(),  # Ensures outputs are in the range [0, 1]
        )


    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

        return x

    def forward_encoder(self, x):

        encoded = self.encoder(x)

        return encoded

# Training function
def train(model, train_loader, criterion, optimizer, num_epochs=10, online = False):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        if not online:
            for images, _ in train_loader:
                transformer = torchvision.transforms.Resize((32,32))
                images = transformer(images)
                noisy_images = images #+ torch.randn(images.size()) * 0.1  # Adding Gaussian noise
                optimizer.zero_grad()
                outputs = model(noisy_images)
                loss = criterion(outputs, images)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
        else:
            for images in train_loader:
                transformer = torchvision.transforms.Resize((32,32))
                images = transformer(images)
                noisy_images = images #+ torch.randn(images.size()) * 0.1  # Adding Gaussian noise
                optimizer.zero_grad()
                outputs = model(noisy_images)
                loss = criterion(outputs, images)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader)}")

# Data preprocessing
transform = torchvision.transforms.Compose([
    transforms.ToTensor(),
])

# Initialize the model, criterion, and optimizer
model = ConvAutoencoder()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


# In[ ]:


# model_Conv_AE_OOD_Cifar10_Color_train = 'classifier.pt'
# path = "/content/gdrive/My Drive/{model_Conv_AE_OOD_Cifar10_Color_train}"
# model.load_state_dict(torch.load(path))


# In[ ]:


# Load CIFAR-100 data
train_dataset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)

# Training the model
train(model, train_loader, criterion, optimizer, num_epochs=3)


# In[ ]:


train_dataset_Cifar100 = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
train_loader_Cifar100 = torch.utils.data.DataLoader(train_dataset_Cifar100, batch_size = 50000, shuffle=True)

for batch in train_loader_Cifar100:
    img, labels = batch
    #img = torchvision.transforms.Grayscale(num_output_channels=1)(img)
    #img = img.reshape(-1, 28*28)

    # Generating output
    out = model.forward(img + torch.randn(img.size()) * 0.0)
    latent_train = model.forward_encoder(img + torch.randn(img.size()) * 0.0)

X_train = img.detach().numpy()
latent_train = latent_train.detach().numpy()
y = labels.detach().numpy()
print(X_train.shape)
print(latent_train.shape)


# In[ ]:


X_train = X_train.reshape(-1,3*32*32)
latent_train = latent_train.reshape(-1,64*4*4)
print(X_train.shape)
print(latent_train.shape)


# In[ ]:


print(np.mean(np.cov(X_train.T)))
print(np.mean(np.cov(latent_train.T)))


# In[ ]:


# y = y[:10000]
# latent_train = latent_train[:10000,:]


# In[ ]:


# np.random.shuffle(y)
# print(y)


# # Tree Model

# In[ ]:


et = ExtraTreesClassifier(n_estimators=500, min_samples_leaf=100,
                          max_features="sqrt", bootstrap=True, class_weight='balanced', n_jobs=-1)

# et = RandomForestClassifier(n_estimators=500, min_samples_leaf=100,
#                           max_features="sqrt", bootstrap=True, class_weight='balanced', n_jobs=-1)

# validation instance
skf = StratifiedKFold(n_splits=5, shuffle=True)

# getting the model validation predictions
preds = cross_val_predict(et, latent_train, y, cv=skf, method='predict_proba')

# evaluating the model
print('Area under the ROC Curve:', roc_auc_score(y, preds, multi_class='ovo'))


# In[ ]:


et.fit(latent_train,y)


# In[ ]:


leaves_train = et.apply(latent_train)
print(leaves_train.shape)
print(leaves_train)

distances_train = np.zeros((1000,1000))


for i in range(1000):
    for j in range(1000):
        distances_train[i,j] = hamming(leaves_train[i,:], leaves_train[j,:])

score_train = sum(distances_train)/999

print(np.mean(score_train))
print(np.cov(score_train))


# In[ ]:





# In[ ]:


model_Conv_AE_OOD_Cifar100_Color_train_30 = 'classifier.pt'
path = "/content/gdrive/My Drive/{model_Conv_AE_OOD_Cifar100_Color_train_30}"
torch.save(model.state_dict(), path)


# In[ ]:


model_Conv_AE_OOD_Cifar100_Color_train_30 = 'classifier.pt'
path = "/content/gdrive/My Drive/{model_Conv_AE_OOD_Cifar100_Color_train_30}"
model.load_state_dict(torch.load(path))


# # Testing on ID Data

# In[ ]:


num_epoch = 30


# In[ ]:


test_dataset_Cifar100 = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)
test_loader_Cifar100 = torch.utils.data.DataLoader(test_dataset_Cifar100, batch_size = 64, shuffle=True)

# model = ConvAutoencoder()
# criterion = nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train(model, test_loader_Cifar100, criterion, optimizer, num_epochs=num_epoch)


# In[ ]:


test_dataset_Cifar100 = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)
test_loader_Cifar100 = torch.utils.data.DataLoader(test_dataset_Cifar100, batch_size = 10000, shuffle=True)

for batch in test_loader_Cifar100:
    img, _ = batch
    #img = torchvision.transforms.Grayscale(num_output_channels=1)(img)
    #img = img.reshape(-1, 28*28)

    # Generating output
    out = model.forward(img + torch.randn(img.size()) * 0.0)

    # out = model.forward(img)
    # img.requires_grad = True
    # loss = criterion(out, img)
    # model.zero_grad()
    # loss.backward()
    # gradient = img.grad.data.sign()
    # img = img + 0.1 * gradient
    # latent_test = model.forward_encoder(img)

    latent_test = model.forward_encoder(img + torch.randn(img.size()) * 0.0)


latent_test = latent_test.detach().numpy()
X_test = img.detach().numpy()
print(X_test.shape)
print(latent_test.shape)
X_test = X_test.reshape(-1,3*32*32)
latent_test = latent_test.reshape(-1,64*4*4)
print(X_test.shape)
print(latent_test.shape)


# In[ ]:


latent_test_in = latent_test

leaves_test_in = et.apply(latent_test_in)
print(leaves_test_in.shape)
print(leaves_test_in)

distances_test_in = np.zeros((1000,1000))
for i in range(1000):
    for j in range(1000):
        distances_test_in[i,j] = hamming(leaves_test_in[i,:], leaves_test_in[j,:])

score_test_in = sum(distances_test_in)/999

print(np.mean(score_test_in))
print(np.cov(score_test_in))


# In[ ]:





# # Testing on OOD Data

# ## SVHN

# In[ ]:


model_Conv_AE_OOD_Cifar100_Color_train_30 = 'classifier.pt'
path = "/content/gdrive/My Drive/{model_Conv_AE_OOD_Cifar100_Color_train_30}"
model.load_state_dict(torch.load(path))


# In[ ]:


transform = torchvision.transforms.Compose(
    [torchvision.transforms.Resize((32, 32)),  # Resize images to match the size of MNIST
     torchvision.transforms.ToTensor()])

train_dataset_SVHN = torchvision.datasets.SVHN(root='./data', split='test', download=True, transform=transform)
train_loader_SVHN = torch.utils.data.DataLoader(train_dataset_SVHN, batch_size=64, shuffle=True, num_workers=4)

# # Training the model
# model = ConvAutoencoder()
# criterion = nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train(model, train_loader_SVHN, criterion, optimizer, num_epochs=num_epoch)


# In[ ]:


test_dataset_SVHN = torchvision.datasets.SVHN(root='./data', split='test', download=True, transform=transform)
test_loader_SVHN = torch.utils.data.DataLoader(test_dataset_SVHN, batch_size=10000, shuffle=True)

for batch in test_loader_SVHN:
    img, _ = batch
    #img = torchvision.transforms.Grayscale(num_output_channels=1)(img)

    # Generating output
    out = model.forward(img + torch.randn(img.size()) * 0.0)

    # out = model.forward(img)
    # img.requires_grad = True
    # loss = criterion(out, img)
    # model.zero_grad()
    # loss.backward()
    # gradient = img.grad.data.sign()
    # img = img + 0.1 * gradient
    # latent_test_out = model.forward_encoder(img)

    latent_test_out = model.forward_encoder(img + torch.randn(img.size()) * 0.0)

latent_test_out_SVHN = latent_test_out.detach().numpy()
latent_test_out_SVHN = latent_test_out_SVHN.reshape(-1,64*4*4)
print(latent_test_out_SVHN.shape)


# In[ ]:


leaves_test_out_SVHN = et.apply(latent_test_out_SVHN)

print(leaves_test_out_SVHN.shape)
print(leaves_test_out_SVHN)

distances_test_out_SVHN = np.zeros((1000,1000))
for i in range(1000):
    for j in range(1000):
        distances_test_out_SVHN[i,j] = hamming(leaves_test_out_SVHN[i,:], leaves_test_out_SVHN[j,:])


score_test_out_SVHN = sum(distances_test_out_SVHN)/999

print(np.mean(score_test_out_SVHN))
print(np.cov(score_test_out_SVHN))


# In[ ]:





# ## DTD

# In[ ]:


model_Conv_AE_OOD_Cifar100_Color_train_30 = 'classifier.pt'
path = "/content/gdrive/My Drive/{model_Conv_AE_OOD_Cifar100_Color_train_30}"
model.load_state_dict(torch.load(path))


# In[ ]:


# fine tuning the AE
transform = torchvision.transforms.Compose(
    [torchvision.transforms.Resize((32, 32)),  # Resize images to match the size of MNIST
     torchvision.transforms.ToTensor()])

train_dataset_DTD = torchvision.datasets.DTD(root='./data', split='test', download=True, transform=transform)
train_loader_DTD = torch.utils.data.DataLoader(train_dataset_DTD, batch_size=64, shuffle=True, num_workers=4)

# # Training the model
# model = ConvAutoencoder()
# criterion = nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train(model, train_loader_DTD, criterion, optimizer, num_epochs=num_epoch)


# In[ ]:


test_dataset_DTD = torchvision.datasets.DTD(root='./data', split='test', download=True, transform=transform)
test_loader_DTD = torch.utils.data.DataLoader(test_dataset_DTD, batch_size=10000, shuffle=True)

for batch in test_loader_DTD:
    img, _ = batch
    #img = torchvision.transforms.Grayscale(num_output_channels=1)(img)

    # Generating output
    out = model.forward(img + torch.randn(img.size()) * 0.0)

    # out = model.forward(img)
    # img.requires_grad = True
    # loss = criterion(out, img)
    # model.zero_grad()
    # loss.backward()
    # gradient = img.grad.data.sign()
    # img = img + 0.1 * gradient
    # latent_test_out = model.forward_encoder(img)

    latent_test_out = model.forward_encoder(img + torch.randn(img.size()) * 0.0)


latent_test_out_DTD = latent_test_out.detach().numpy()
latent_test_out_DTD = latent_test_out_DTD.reshape(-1,64*4*4)
print(latent_test_out_DTD.shape)


# In[ ]:


leaves_test_out_DTD = et.apply(latent_test_out_DTD)

print(leaves_test_out_DTD.shape)
print(leaves_test_out_DTD)

distances_test_out_DTD = np.zeros((1000,1000))
for i in range(1000):
    for j in range(1000):
        distances_test_out_DTD[i,j] = hamming(leaves_test_out_DTD[i,:], leaves_test_out_DTD[j,:])


score_test_out_DTD = sum(distances_test_out_DTD)/999

print(np.mean(score_test_out_DTD))
print(np.cov(score_test_out_DTD))


# ## Places365

# In[ ]:


model_Conv_AE_OOD_Cifar100_Color_train_30 = 'classifier.pt'
path = "/content/gdrive/My Drive/{model_Conv_AE_OOD_Cifar100_Color_train_30}"
model.load_state_dict(torch.load(path))


# In[ ]:


# fine tuning the AE
transform = torchvision.transforms.Compose(
    [torchvision.transforms.Resize((32, 32)),  # Resize images to match the size of MNIST
     torchvision.transforms.ToTensor()])

train_dataset_Places365 = torchvision.datasets.Places365(root='./data', split='val', small=True, download=False, transform=transform)
train_loader_Places365 = torch.utils.data.DataLoader(train_dataset_Places365, batch_size=64, shuffle=True, num_workers=4)

# # Training the model
# model = ConvAutoencoder()
# criterion = nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train(model, train_loader_Places365, criterion, optimizer, num_epochs=num_epoch)


# In[ ]:


test_dataset_Places365 = torchvision.datasets.Places365(root='./data', split='val', small=True, download=False, transform=transform)
test_loader_Places365 = torch.utils.data.DataLoader(test_dataset_Places365, batch_size=10000, shuffle=True)

for batch in test_loader_Places365:
    img, _ = batch
    #img = torchvision.transforms.Grayscale(num_output_channels=1)(img)
    #img = img.reshape(-1,1,28,28)

    # Generating output
    out = model.forward(img + torch.randn(img.size()) * 0.0)

    # out = model.forward(img)
    # img.requires_grad = True
    # loss = criterion(out, img)
    # model.zero_grad()
    # loss.backward()
    # gradient = img.grad.data.sign()
    # img = img + 0.1 * gradient
    # latent_test_out = model.forward_encoder(img)

    latent_test_out = model.forward_encoder(img + torch.randn(img.size()) * 0.0)


latent_test_out_Places365 = latent_test_out.detach().numpy()
latent_test_out_Places365 = latent_test_out_Places365.reshape(-1,64*4*4)
print(latent_test_out_Places365.shape)


# In[ ]:


leaves_test_out_Places365 = et.apply(latent_test_out_Places365)

print(leaves_test_out_Places365.shape)
print(leaves_test_out_Places365)

distances_test_out_Places365 = np.zeros((1000,1000))
for i in range(1000):
    for j in range(1000):
        distances_test_out_Places365[i,j] = hamming(leaves_test_out_Places365[i,:], leaves_test_out_Places365[j,:])


score_test_out_Places365 = sum(distances_test_out_Places365)/999

print(np.mean(score_test_out_Places365))
print(np.cov(score_test_out_Places365))


# ## iSUN

# In[ ]:


model_Conv_AE_OOD_Cifar100_Color_train_30 = 'classifier.pt'
path = "/content/gdrive/My Drive/{model_Conv_AE_OOD_Cifar100_Color_train_30}"
model.load_state_dict(torch.load(path))


# In[ ]:


# images = torch.zeros((8924,3,32,32))

# for i in range(8924):
#     print(i)
#     images[i,:,:,:] = read_image('/content/gdrive/MyDrive/OODdata/iSUN/iSUN_patches/' + str(i) + '.jpeg')

# torch.save(images, '/content/gdrive/MyDrive/images_iSUN.t')
images = torch.load('/content/gdrive/MyDrive/TOOD/datasets/images_iSUN.t')

test_dataset_iSUN = images/255
train_loader_iSUN = torch.utils.data.DataLoader(test_dataset_iSUN, batch_size = 64, shuffle=True)

# # Training the model
# model = ConvAutoencoder()
# criterion = nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train(model, train_loader_iSUN, criterion, optimizer, num_epochs=num_epoch, online=True)


# In[ ]:


test_loader_iSUN = torch.utils.data.DataLoader(test_dataset_iSUN, batch_size = 10000, shuffle=True)

for batch in test_loader_iSUN:
    img = batch
    transforms = torchvision.transforms.Resize((32,32))
    #img = torchvision.transforms.Grayscale(num_output_channels=1)(img)
    #img = img.reshape(-1, 28*28)

    # Generating output
    img = transforms(img)
    out = model.forward(img + torch.randn(img.size()) * 0.0)

    # out = model.forward(img)
    # img.requires_grad = True
    # loss = criterion(out, img)
    # model.zero_grad()
    # loss.backward()
    # gradient = img.grad.data.sign()
    # img = img + 0.1 * gradient
    # latent_test_out = model.forward_encoder(img)

    latent_test_out = model.forward_encoder(img + torch.randn(img.size()) * 0.0)


latent_test_out_iSUN = latent_test_out.detach().numpy()
print(latent_test_out_iSUN.shape)
latent_test_out_iSUN = latent_test_out_iSUN.reshape(-1,64*4*4)
print(latent_test_out_iSUN.shape)


# In[ ]:


leaves_test_out_iSUN = et.apply(latent_test_out_iSUN)

print(leaves_test_out_iSUN.shape)
print(leaves_test_out_iSUN)

distances_test_out_iSUN = np.zeros((1000,1000))
for i in range(1000):
    for j in range(1000):
        distances_test_out_iSUN[i,j] = hamming(leaves_test_out_iSUN[i,:], leaves_test_out_iSUN[j,:])


score_test_out_iSUN = sum(distances_test_out_iSUN)/999

print(np.mean(score_test_out_iSUN))
print(np.cov(score_test_out_iSUN))


# In[ ]:





# ## LSUN

# In[ ]:


model_Conv_AE_OOD_Cifar100_Color_train_30 = 'classifier.pt'
path = "/content/gdrive/My Drive/{model_Conv_AE_OOD_Cifar100_Color_train_30}"
model.load_state_dict(torch.load(path))


# In[ ]:


# images = torch.zeros((10000,3,36,36))

# for i in range(10000):
#     print(i)
#     images[i,:,:,:] = read_image('/content/gdrive/MyDrive/OODdata/LSUN/test/' + str(i) + '.png')

# torch.save(images, '/content/gdrive/MyDrive/images_LSUN.t')
images = torch.load('/content/gdrive/MyDrive/TOOD/datasets/images_LSUN.t')

test_dataset_LSUN = images/255
train_loader_LSUN = torch.utils.data.DataLoader(test_dataset_LSUN, batch_size = 64, shuffle=True)

# # Training the model
# model = ConvAutoencoder()
# criterion = nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train(model, train_loader_LSUN, criterion, optimizer, num_epochs=num_epoch, online=True)


# In[ ]:


test_loader_LSUN = torch.utils.data.DataLoader(test_dataset_LSUN, batch_size = 10000, shuffle=True)

for batch in test_loader_LSUN:
    img = batch
    transforms = torchvision.transforms.Resize((32,32))
    #img = torchvision.transforms.Grayscale(num_output_channels=1)(img)
    #img = img.reshape(-1, 28*28)

    # Generating output
    img = transforms(img)
    out = model.forward(img + torch.randn(img.size()) * 0.0)

    # out = model.forward(img)
    # img.requires_grad = True
    # loss = criterion(out, img)
    # model.zero_grad()
    # loss.backward()
    # gradient = img.grad.data.sign()
    # img = img + 0.1 * gradient
    # latent_test_out = model.forward_encoder(img)

    latent_test_out = model.forward_encoder(img + torch.randn(img.size()) * 0.0)


latent_test_out_LSUN = latent_test_out.detach().numpy()
print(latent_test_out_LSUN.shape)
latent_test_out_LSUN = latent_test_out_LSUN.reshape(-1,64*4*4)
print(latent_test_out_LSUN.shape)


# In[ ]:


leaves_test_out_LSUN = et.apply(latent_test_out_LSUN)

print(leaves_test_out_LSUN.shape)
print(leaves_test_out_LSUN)

distances_test_out_LSUN = np.zeros((1000,1000))
for i in range(1000):
    for j in range(1000):
        distances_test_out_LSUN[i,j] = hamming(leaves_test_out_LSUN[i,:], leaves_test_out_LSUN[j,:])


score_test_out_LSUN = sum(distances_test_out_LSUN)/999

print(np.mean(score_test_out_LSUN))
print(np.cov(score_test_out_LSUN))


# ## LSUN-resize

# In[ ]:


model_Conv_AE_OOD_Cifar100_Color_train_30 = 'classifier.pt'
path = "/content/gdrive/My Drive/{model_Conv_AE_OOD_Cifar100_Color_train_30}"
model.load_state_dict(torch.load(path))


# In[ ]:


# images = torch.zeros((10000,3,32,32))

# for i in range(10000):
#     print(i)
#     images[i,:,:,:] = read_image('/content/gdrive/MyDrive/OODdata/LSUN_resize/test/' + str(i) + '.jpg')

# torch.save(images, '/content/gdrive/MyDrive/images_LSUN_resize.t')
images = torch.load('/content/gdrive/MyDrive/TOOD/datasets/images_LSUN_resize.t')

test_dataset_LSUN_resize = images/255
train_loader_LSUN_resize = torch.utils.data.DataLoader(test_dataset_LSUN_resize, batch_size = 64, shuffle=True)

# # Training the model
# model = ConvAutoencoder()
# criterion = nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train(model, train_loader_LSUN_resize, criterion, optimizer, num_epochs=num_epoch, online=True)


# In[ ]:


test_loader_LSUN_resize = torch.utils.data.DataLoader(test_dataset_LSUN_resize, batch_size = 10000, shuffle=True)

for batch in test_loader_LSUN_resize:
    img = batch
    transforms = torchvision.transforms.Resize((32,32))
    #img = torchvision.transforms.Grayscale(num_output_channels=1)(img)
    #img = img.reshape(-1, 28*28)

    # Generating output
    img = transforms(img)
    out = model.forward(img + torch.randn(img.size()) * 0.0)

    # out = model.forward(img)
    # img.requires_grad = True
    # loss = criterion(out, img)
    # model.zero_grad()
    # loss.backward()
    # gradient = img.grad.data.sign()
    # img = img + 0.1 * gradient
    # latent_test_out = model.forward_encoder(img)

    latent_test_out = model.forward_encoder(img + torch.randn(img.size()) * 0.0)


latent_test_out_LSUN_resize = latent_test_out.detach().numpy()
print(latent_test_out_LSUN_resize.shape)
latent_test_out_LSUN_resize = latent_test_out_LSUN_resize.reshape(-1,64*4*4)
print(latent_test_out_LSUN_resize.shape)


# In[ ]:


leaves_test_out_LSUN_resize = et.apply(latent_test_out_LSUN_resize)

print(leaves_test_out_LSUN_resize.shape)
print(leaves_test_out_LSUN_resize)

distances_test_out_LSUN_resize = np.zeros((1000,1000))
for i in range(1000):
    for j in range(1000):
        distances_test_out_LSUN_resize[i,j] = hamming(leaves_test_out_LSUN_resize[i,:], leaves_test_out_LSUN_resize[j,:])


score_test_out_LSUN_resize = sum(distances_test_out_LSUN_resize)/999

print(np.mean(score_test_out_LSUN_resize))
print(np.cov(score_test_out_LSUN_resize))


# In[ ]:





# ## STL10

# In[ ]:


model_Conv_AE_OOD_Cifar100_Color_train_30 = 'classifier.pt'
path = "/content/gdrive/My Drive/{model_Conv_AE_OOD_Cifar100_Color_train_30}"
model.load_state_dict(torch.load(path))


# In[ ]:


# fine tuning the AE
transform = torchvision.transforms.Compose(
    [torchvision.transforms.Resize((32, 32)),  # Resize images to match the size of MNIST
     torchvision.transforms.ToTensor()])

train_dataset_STL10 = torchvision.datasets.STL10(root='./data', split='test', download=True, transform=transform)
train_loader_STL10 = torch.utils.data.DataLoader(train_dataset_STL10, batch_size=64, shuffle=True, num_workers=4)

# # Training the model
# model = ConvAutoencoder()
# criterion = nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train(model, train_loader_STL10, criterion, optimizer, num_epochs=num_epoch)


# In[ ]:


test_dataset_STL10 = torchvision.datasets.STL10(root='./data', split='test', download=True, transform=transform)
test_loader_STL10 = torch.utils.data.DataLoader(test_dataset_STL10, batch_size=10000, shuffle=True)

for batch in test_loader_STL10:
    img, _ = batch
    #img = torchvision.transforms.Grayscale(num_output_channels=1)(img)

    # Generating output
    out = model.forward(img + torch.randn(img.size()) * 0.0)

    # out = model.forward(img)
    # img.requires_grad = True
    # loss = criterion(out, img)
    # model.zero_grad()
    # loss.backward()
    # gradient = img.grad.data.sign()
    # img = img + 0.1 * gradient
    # latent_test_out = model.forward_encoder(img)

    latent_test_out = model.forward_encoder(img + torch.randn(img.size()) * 0.0)

latent_test_out_STL10 = latent_test_out.detach().numpy()
latent_test_out_STL10 = latent_test_out_STL10.reshape(-1,64*4*4)
print(latent_test_out_STL10.shape)


# In[ ]:


leaves_test_out_STL10 = et.apply(latent_test_out_STL10)

print(leaves_test_out_STL10.shape)
print(leaves_test_out_STL10)

distances_test_out_STL10 = np.zeros((1000,1000))
for i in range(1000):
    for j in range(1000):
        distances_test_out_STL10[i,j] = hamming(leaves_test_out_STL10[i,:], leaves_test_out_STL10[j,:])


score_test_out_STL10 = sum(distances_test_out_STL10)/999

print(np.mean(score_test_out_STL10))
print(np.cov(score_test_out_STL10))


# In[ ]:





# ## Results

# In[ ]:


my_dict = {'Cifar100': score_test_in, 'SVHN': score_test_out_SVHN, 'DTD': score_test_out_DTD,
           'Places365': score_test_out_Places365, 'iSUN': score_test_out_iSUN, 'LSUN': score_test_out_LSUN, 
           'LSUN-resize': score_test_out_LSUN_resize, 'STL10': score_test_out_STL10}

plt.figure(figsize=(10,6))
plt.boxplot(my_dict.values(), labels=my_dict.keys());
plt.show()


# In[ ]:


score_pred_STL10 = np.concatenate([score_test_in, score_test_out_STL10])
score_pred_Places365 = np.concatenate([score_test_in, score_test_out_Places365])
score_pred_iSUN = np.concatenate([score_test_in, score_test_out_iSUN])
score_pred_LSUN = np.concatenate([score_test_in, score_test_out_LSUN])
score_pred_LSUN_resize = np.concatenate([score_test_in, score_test_out_LSUN_resize])
score_pred_SVHN = np.concatenate([score_test_in, score_test_out_SVHN])
score_pred_DTD = np.concatenate([score_test_in, score_test_out_DTD])
score_true = np.concatenate([np.ones(1000), np.zeros(1000)])


print(roc_auc_score(score_true, score_pred_SVHN))
print(roc_auc_score(score_true, score_pred_DTD))
print(roc_auc_score(score_true, score_pred_Places365))
print(roc_auc_score(score_true, score_pred_iSUN))
print(roc_auc_score(score_true, score_pred_LSUN))
print(roc_auc_score(score_true, score_pred_LSUN_resize))
print(roc_auc_score(score_true, score_pred_STL10))


# In[ ]:


precision_STL10, recall_STL10, thresholds_STL10 = precision_recall_curve(score_true, score_pred_STL10)
precision_Places365, recall_Places365, thresholds_Places365 = precision_recall_curve(score_true, score_pred_Places365)
precision_SVHN, recall_SVHN, thresholds_SVHN = precision_recall_curve(score_true, score_pred_SVHN)
precision_DTD, recall_DTD, thresholds_DTD = precision_recall_curve(score_true, score_pred_DTD)
precision_iSUN, recall_iSUN, thresholds_iSUN = precision_recall_curve(score_true, score_pred_iSUN)
precision_LSUN, recall_LSUN, thresholds_LSUN = precision_recall_curve(score_true, score_pred_LSUN)
precision_LSUN_resize, recall_LSUN_resize, thresholds_LSUN_resize = precision_recall_curve(score_true, score_pred_LSUN_resize)

auc_precision_recall_STL10= auc(recall_STL10, precision_STL10)
auc_precision_recall_Places365 = auc(recall_Places365, precision_Places365)
auc_precision_recall_SVHN = auc(recall_SVHN, precision_SVHN)
auc_precision_recall_DTD = auc(recall_DTD, precision_DTD)
auc_precision_recall_iSUN = auc(recall_iSUN, precision_iSUN)
auc_precision_recall_LSUN = auc(recall_LSUN, precision_LSUN)
auc_precision_recall_LSUN_resize = auc(recall_LSUN_resize, precision_LSUN_resize)

print(auc_precision_recall_SVHN)
print(auc_precision_recall_DTD)
print(auc_precision_recall_Places365)
print(auc_precision_recall_iSUN)
print(auc_precision_recall_LSUN)
print(auc_precision_recall_LSUN_resize)
print(auc_precision_recall_STL10)


# In[ ]:


def compute_fpr95(y_true, y_pred_probs):
    fpr, tpr, thresholds = sklearn.metrics.roc_curve(y_true, y_pred_probs)
    idx = np.abs(tpr - 0.95).argmin()
    fpr95 = fpr[idx]
    return fpr95

# Example usage
# Assuming you have y_true (true labels) and y_pred_probs (predicted probabilities)
fpr95_score_SVHN = compute_fpr95(score_true, score_pred_SVHN)
fpr95_score_DTD = compute_fpr95(score_true, score_pred_DTD)
fpr95_score_Places365 = compute_fpr95(score_true, score_pred_Places365)
fpr95_score_iSUN = compute_fpr95(score_true, score_pred_iSUN)
fpr95_score_LSUN = compute_fpr95(score_true, score_pred_LSUN)
fpr95_score_LSUN_resize = compute_fpr95(score_true, score_pred_LSUN_resize)
fpr95_score_STL10 = compute_fpr95(score_true, score_pred_STL10)


print(fpr95_score_SVHN)
print(fpr95_score_DTD)
print(fpr95_score_Places365)
print(fpr95_score_iSUN)
print(fpr95_score_LSUN)
print(fpr95_score_LSUN_resize)
print(fpr95_score_STL10)


# In[ ]:





# In[ ]:


print(np.mean(score_test_in), np.sqrt(np.cov(score_test_in)))
print(np.mean(score_test_out_SVHN), np.sqrt(np.cov(score_test_out_SVHN)))
print(np.mean(score_test_out_DTD), np.sqrt(np.cov(score_test_out_DTD)))
print(np.mean(score_test_out_Places365), np.sqrt(np.cov(score_test_out_Places365)))
print(np.mean(score_test_out_iSUN), np.sqrt(np.cov(score_test_out_iSUN)))
print(np.mean(score_test_out_LSUN), np.sqrt(np.cov(score_test_out_LSUN)))
print(np.mean(score_test_out_LSUN_resize), np.sqrt(np.cov(score_test_out_LSUN_resize)))
print(np.mean(score_test_out_STL10), np.sqrt(np.cov(score_test_out_STL10)))


# In[ ]:




