import functools
import logging
import torch
import torch.nn as nn
from torch.nn import init
from torch.nn import modules
import torch.distributed as dist

####################
# initialize
####################
from .sr3_modules import esrt, elan, edsr

prior_model = {"esrt": esrt.ESRT, "elan": elan.ELAN, "edsr": edsr.EDSR}

def weights_init_normal(m, std=0.02):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        init.normal_(m.weight.data, 0.0, std)
        if m.bias is not None:
            m.bias.data.zero_()
    elif classname.find('Linear') != -1:
        init.normal_(m.weight.data, 0.0, std)
        if m.bias is not None:
            m.bias.data.zero_()
    elif classname.find('BatchNorm2d') != -1:
        init.normal_(m.weight.data, 1.0, std)  # BN also uses norm
        init.constant_(m.bias.data, 0.0)

def weights_init_kaiming(m, scale=1):
    classname = m.__class__.__name__
    if classname.find('Conv2d') != -1:
        init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
        m.weight.data *= scale
        if m.bias is not None:
            m.bias.data.zero_()
    elif classname.find('Linear') != -1:
        init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
        m.weight.data *= scale
        if m.bias is not None:
            m.bias.data.zero_()
    elif classname.find('BatchNorm2d') != -1:
        init.constant_(m.weight.data, 1.0)
        init.constant_(m.bias.data, 0.0)

def weights_init_orthogonal(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        init.orthogonal_(m.weight.data, gain=1)
        if m.bias is not None:
            m.bias.data.zero_()
    elif classname.find('Linear') != -1:
        init.orthogonal_(m.weight.data, gain=1)
        if m.bias is not None:
            m.bias.data.zero_()
    elif classname.find('BatchNorm2d') != -1:
        init.constant_(m.weight.data, 1.0)
        init.constant_(m.bias.data, 0.0)

def init_weights(net, init_type='kaiming', scale=1, std=0.02):
    # scale for 'kaiming', std for 'normal'.
    logging.info('Initialization method [{:s}]'.format(init_type))
    if init_type == 'normal':
        weights_init_normal_ = functools.partial(weights_init_normal, std=std)
        net.apply(weights_init_normal_)
    elif init_type == 'kaiming':
        weights_init_kaiming_ = functools.partial(
            weights_init_kaiming, scale=scale)
        net.apply(weights_init_kaiming_)
    elif init_type == 'orthogonal':
        net.apply(weights_init_orthogonal)
    else:
        raise NotImplementedError(
            'initialization method [{:s}] not implemented'.format(init_type))


####################
# define network
####################


# Generator
def define_G(opt):
    model_opt = opt['model']

    if model_opt['which_model_G'] == 'sr3':
        from .sr3_modules import diffusion, unet, mlp
    if ('norm_groups' not in model_opt['unet']) or model_opt['unet']['norm_groups'] is None:
        model_opt['unet']['norm_groups']=32 # 32
    
    # load unet ------------------------------------------------
    model = unet.UNet(
        in_channel=model_opt['unet']['in_channel'],
        out_channel=model_opt['unet']['out_channel'],
        norm_groups=model_opt['unet']['norm_groups'],
        inner_channel=model_opt['unet']['inner_channel'],
        channel_mults=model_opt['unet']['channel_multiplier'],
        attn_res=model_opt['unet']['attn_res'],
        res_blocks=model_opt['unet']['res_blocks'],
        dropout=model_opt['unet']['dropout'],
        image_size=model_opt['diffusion']['image_size']
    )
    
    # load prior models -----------------------------------------
    if opt["prior_type"] in prior_model.keys():
        encoder = prior_model[opt["prior_type"]](**opt[opt["prior_type"]])
        # encoder = edsr.EDSR(n_resblocks=16, n_feats=64, res_scale=1,scale=8, no_upsampling=False, rgb_range=1)
    else:
        raise KeyError

    imnet = mlp.MLP(in_dim=64+2, out_dim=3, hidden_list=[256, 256, 256, 256])

    netG = diffusion.GaussianDiffusion(
    encoder,
    imnet,
    model,
    image_size=model_opt['diffusion']['image_size'],
    channels=model_opt['diffusion']['channels'],
    loss_type='l1',    # L1 or L2
    conditional=model_opt['diffusion']['conditional'],
    schedule_opt=model_opt['beta_schedule']['train'])

    if opt['phase'] == 'train':

        init_weights(netG, init_type='orthogonal')

    assert torch.cuda.is_available()
    # netG = nn.parallel.DistributedDataParallel(netG.cuda(), device_ids=[dist.get_rank()], find_unused_parameters=True)
    return netG.cuda()
