import time
from flcore.clients.clientbase import Client
from torchvision.transforms import transforms
import copy
import torch.nn.functional as F
import torch
import torch.nn as nn
from utils.optim_utils import ESAM,flatten_grads,assign_grads

class clientSMOO(Client):
    def __init__(self, args, id, train_samples, **kwargs):
        super().__init__(args, id, train_samples, **kwargs)
        self.dual_variable = None
        self.local_update = None
        # self.global_s = None
        # self.mu_i = torch.zeros_like((param_to_vector(args.model).detach()))

    def train(self):
        base_optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate, weight_decay=1e-5,
                                         momentum=self.momentum)
        optimizer = ESAM(self.model.parameters(), base_optimizer, rho=self.rho)

        trainloader = self.load_train_data()
        self.model.train()

        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip()])

        with torch.no_grad():
            regular_params = param_to_vector(self.model).detach()

        for step in range(self.local_epochs):
            for i, (x, y) in enumerate(trainloader):
                x = x.to(self.device)
                y = y.to(self.device)
                if self.dataset != "agnews":
                    x = transform_train(x)

                # output = self.model(x)
                # loss = self.loss(output, y)
                # loss.backward()

                # grad_batch = flatten_grads(self.model).detach().clone()
                # optimizer.zero_grad()

                # grad_batch = grad_batch - self.mu_i - self.global_s
                # self.model = assign_grads(self.model, grad_batch)

                optimizer.paras = [x, y, self.loss, self.model]
                s_i_k = optimizer.step()
                # self.mu_i += (s_i_k - self.global_s)

                # dyn
                local_params = param_to_vector(self.model)
                loss = self.beta / 2 * torch.norm(local_params - regular_params, 2)
                loss += self.beta*torch.dot(local_params, self.dual_variable)

                loss.backward()
                base_optimizer.step()

        # DYN
        with torch.no_grad():
            local_params = param_to_vector(self.model).detach()
            self.local_update = local_params - regular_params
            # self.local_s_i = self.mu_i - s_i_k

def param_to_vector(model):
    # model parameters ---> vector (same storage)

    vec = []
    for param in model.parameters():
        vec.append((param.reshape(-1)))
    return torch.cat(vec)



