import torch
import torch.nn as nn
from mnist.model_mnist import GCNNShell_mnist
from utils.math_tools import RotateTransform
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import tqdm
import math
import numpy as np
import os
import datetime
import random
from utils.model_tools import plot
import argparse

def visualize(model, string_type):
    root = 'visualize/aug_mnist_{}/'.format(string_type)
    print("saving to {}".format(root))
    # want to plot all layers of the first eight channels
    for i in range(8):
        saving_root = root + '{}/'.format(i)
        if not os.path.exists(saving_root):
            os.makedirs(saving_root)
        counter = 0
        for name, layer in model.named_modules():
            if isinstance(layer, nn.Conv2d):
                weights = layer.weight.data.cpu().numpy()
                plot(weights[i][0], saving_path=saving_root + 'channel_{}'.format(counter) )
                counter = counter + 1
    print("done visualizing.")

def similarity(square_matrix):
    # splice the matrix into four sub-matrices
    similarity = 0
    max = torch.max(square_matrix)
    min = torch.min(square_matrix)
    if square_matrix.shape[0] % 2!= 0:
        length_1 = math.ceil(square_matrix.shape[0] / 2)
        length_2 = math.floor(square_matrix.shape[0] / 2)
    else:
        length_1 =int(square_matrix.shape[0] /2)
        length_2 = length_1
    top_left = square_matrix[:length_1, :length_1]
    top_right = square_matrix[:length_1, length_2:]
    bottom_left = square_matrix[length_2:, :length_1]
    bottom_right = square_matrix[length_2:, length_2:]
    top_right = torch.rot90(top_right, 1, [0, 1])
    bottom_left = torch.rot90(bottom_left, 3, [0, 1])
    bottom_right = torch.rot90(bottom_right, 2, [0, 1])
    # use MSE difference to calculate similarity
    similarity += (torch.mean((top_left - top_right) ** 2) + torch.mean((bottom_right - bottom_left) ** 2) + torch.mean((top_right - bottom_right) ** 2) + torch.mean((bottom_left - top_left) ** 2)) / 4
    return similarity / (length_1*(max-min))

def calculate_weight_similarity(model):
    total_similarity = 0
    for name, layer in model.named_modules():
        if isinstance(layer, nn.Conv2d):
            weights = layer.weight.data.cpu()
            assert len(weights.shape) == 4
            for i in range(weights.shape[0]):
                for j in range(weights.shape[1]):
                    total_similarity += similarity(weights[i][j]) / (weights.shape[0] * weights.shape[1])
    return total_similarity

def train_augmented(total_epoch=100, visual=False, load = False, batch_size=128, starting = 0, choice=-1, calculate=False,rot=False):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(device, "is the device.")
    model = GCNNShell_mnist().to(device)
    if rot:
        root = 'saved_pth/mnist/rot_CNN'
    else:
        root = 'saved_pth/mnist/regular_CNN'
    if os.path.exists(root):
        pass
    else:
        os.makedirs(root)
    if load or visual:
        if choice == -1:
            model.load_state_dict(torch.load(os.path.join(root, 'final.pth')))
            choice = 'final'
        else:
            model.load_state_dict(torch.load(os.path.join(root, 'epoch_{}.pth'.format(choice))))
            choice = str(choice)
        print("loaded model {}".format(choice))

    if visual:
        print("visualizing")
        visualize(model, choice)
        return

    if calculate:
        print("calculating weight similarity")
        weight_similarity = calculate_weight_similarity(model)
        print("weight similarity: {}".format(weight_similarity))
        return

    if rot:
        rot_train_loader = torch.utils.data.DataLoader(
                datasets.MNIST('data', train=True, transform=transforms.Compose([
                    transforms.ToTensor(), transforms.RandomRotation(180), transforms.Normalize((0.1307,), (0.3081,))
                ])), batch_size=batch_size, shuffle=True)

        rot_test_loader = torch.utils.data.DataLoader(
                datasets.MNIST('data', train=False, transform=transforms.Compose([
                    transforms.ToTensor(), transforms.RandomRotation(180), transforms.Normalize((0.1307,), (0.3081,))
                ])), batch_size=batch_size, shuffle=False)
    else:
        rot_train_loader = torch.utils.data.DataLoader(
                datasets.MNIST('data', train=True, transform=transforms.Compose([
                    transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))
                ])), batch_size=batch_size, shuffle=True)

        rot_test_loader = torch.utils.data.DataLoader(
                datasets.MNIST('data', train=False, transform=transforms.Compose([
                    transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))
                ])), batch_size=batch_size, shuffle=False)


    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    print("begin training.")
    for epoch in range(total_epoch):
        model.train()
        for data, target in rot_train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
        model.eval()
        test_loss = 0
        correct = 0
        with torch.no_grad():
            for data, target in rot_test_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                test_loss += criterion(output, target).item()
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        test_loss /= len(rot_test_loader.dataset)

        if epoch % 25 == 0:
            weight_similarity = calculate_weight_similarity(model)
            print("weight similarity: {}".format(weight_similarity))
            print('\nEpoch: {} \tTraining Loss: {:.3f} \tTest Loss: {:.3f} \tTest Accuracy: {:.2f}%\n'.format(epoch, loss.item(), test_loss, 100. * correct / len(rot_test_loader.dataset)))

            torch.save(model.state_dict(), os.path.join(root, 'epoch_{}.pth'.format(epoch+starting*total_epoch)))
    torch.save(model.state_dict(), os.path.join(root, 'final.pth'))

if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    parser.add_argument('--train', action="store_true")
    parser.add_argument('--calculate', action="store_true")
    parser.add_argument('--epoch', type=int, default=100, help='number of epochs to train')
    parser.add_argument('--visual', action="store_true")
    parser.add_argument('--load', action="store_true")
    parser.add_argument('--batch_size', type=int, default=128, help='batch size for training')
    parser.add_argument('--rot', action="store_true")
    args = parser.parse_args()
    start = 0
    if args.rot:
        print("training with rotation augmentation")
    else:
        print("training without rotation augmentation")
    if args.train:
        if args.load:
            choice = int(input("Enter epoch to load: (-1 for final)"))
        else:
            choice = -1
        while True:
            train_augmented(args.epoch, False, args.load, args.batch_size, starting=start, choice=choice, rot=args.rot)
            start += 1
            args.load = True
    elif args.visual:
        choice = int(input("Enter epoch to load: (-1 for final)"))
        train_augmented(visual=True, choice=choice, rot=args.rot)
    elif args.calculate:
        while True:
            choice = int(input("Enter epoch to load: (-1 for final)"))
            train_augmented(calculate=True, choice=choice, rot=args.rot)

