import numpy as np
import math
import torch
from torch import nn
from torch.nn.functional import cross_entropy
from keras_lightning import KLModel, SparseCategoricalAccuracy
import torch_wnn as wnn
import openml
from sklearn.model_selection import train_test_split
import pickle

from difflogic import LogicLayer

from unoptimized.modules.linear import UnoptimizedLinear
from deepshift.modules import LinearShift
from deepshift.modules_q import LinearShiftQ
from deepshift.convert import convert_to_shift, round_shift_weights, count_layer_type

import sys
import random
import signal
from brevitas.nn import QuantIdentity, QuantLinear
from common import CommonWeightQuant, CommonActQuant
from tensor_norm import TensorNorm

import itertools

N = 1000
acc = np.zeros(N)

class Temperature(nn.Module):
    def __init__(self, tau=1):
        super().__init__()
        self.tau = tau

    def forward(self, x):
        return x / self.tau

x_train = np.load('satimage_x_train.npy')
y_train = np.load('satimage_y_train.npy')

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, train_size=0.9)

num_bits = 24
thresholds = wnn.binarization.distributive_thresholds(x_train, num_bits, individual=True)
x_train_b = wnn.binarization.apply_thresholds(x_train, thresholds).flatten(start_dim=1)
x_val_b = wnn.binarization.apply_thresholds(x_val, thresholds).flatten(start_dim=1)

input_lenght = x_train_b.size(1)
num_output = int(y_train.max() + 1)

model_size = 9.5 * 8 * 1024

tuple_lenght = 4
num_keys = 2
filter_tuple_lenght = 3
dropout = 0.5
num_models = math.floor(model_size / (math.ceil(input_lenght / tuple_lenght)*2**filter_tuple_lenght*num_output))

for i in range(N):

    model = KLModel(nn.Sequential(
        wnn.MultiDoMaFilter2(
            input_lenght=input_lenght,
            tuple_lenght=tuple_lenght, 
            num_keys=num_keys, 
            filter_tuple_lenght=filter_tuple_lenght, 
            num_output=num_output, 
            num_models=num_models,
            dropout=dropout,
    )))

    optimizer = torch.optim.Adam(model.parameters(), 1e-2)

    model.compile(
        optimizer=optimizer,
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, gamma=0.1, step_size=10),
        loss=cross_entropy,
        metrics={'acc': SparseCategoricalAccuracy()}
    )

    results = model.fit(
        x_train_b, y_train,
        x_val=x_val_b, y_test=y_val,
        epochs=30, 
        batch_size=32
    )

    acc[i] = results['test_acc']

print(f'\n Mean: {acc.mean()}, Std: {acc.std()} \n')
###################################################################################################


    


