import torch
import torch.nn as nn
from models.utils import StraightThrough
import torch.nn.init as init


def _fold_bn(conv_module, bn_module, avg=False):
    w = conv_module.weight.data
    y_mean = bn_module.running_mean
    y_var = bn_module.running_var
    safe_std = torch.sqrt(y_var + bn_module.eps)
    w_view = (conv_module.out_channels, 1, 1, 1)
    if bn_module.affine:
        weight = w * (bn_module.weight / safe_std).view(w_view)
        beta = bn_module.bias - bn_module.weight * y_mean / safe_std
        if conv_module.bias is not None:
            bias = bn_module.weight * conv_module.bias / safe_std + beta
        else:
            bias = beta
    else:
        weight = w / safe_std.view(w_view)
        beta = -y_mean / safe_std
        if conv_module.bias is not None:
            bias = conv_module.bias / safe_std + beta
        else:
            bias = beta
    return weight, bias


def _back_bn(conv_module, bn_module, avg=False):
    #w = conv_module.weight.data
    #y_mean = bn_module.running_mean
    #y_var = bn_module.running_var
    #safe_std = torch.sqrt(y_var + bn_module.eps)  # 防止出现数值问题
    #w_view = (conv_module.out_channels, 1, 1, 1)
    #if bn_module.affine:
    #   weight = w / (bn_module.weight / safe_std).view(w_view)
    #else:
    #    weight = w * safe_std.view(w_view)
    y_mean = bn_module.running_mean
    y_var = bn_module.running_var
    safe_std = torch.sqrt(y_var + bn_module.eps)
    beta = bn_module.bias - bn_module.weight * y_mean / safe_std
    bias = (conv_module.bias - beta) * safe_std / bn_module.weight
    return bias


def fold_bn_into_conv(conv_module, bn_module, avg=False):
    w, b = _fold_bn(conv_module, bn_module, avg)
    if conv_module.bias is None:
        conv_module.bias = nn.Parameter(b)
    else:
        conv_module.bias.data = b
    conv_module.weight.data = w
    # set bn running stats
    bn_module.running_mean = bn_module.bias.data
    bn_module.running_var = bn_module.weight.data ** 2


def back_bn_to_conv(conv_module, bn_module, avg=False):
    b = _back_bn(conv_module, bn_module, avg)
    # if conv_module.bias is None:
    #     conv_module.bias = nn.Parameter(b)
    # else:
    #     conv_module.bias.data = b
    conv_module.bias.data = b
    # set bn running stats
    # bn_module.running_mean = bn_module.bias.data
    # bn_module.running_var = bn_module.weight.data ** 2


def reset_bn(module: nn.BatchNorm2d):
    if module.track_running_stats:
        module.running_mean.zero_()
        module.running_var.fill_(1 - module.eps)
        # we do not reset numer of tracked batches here
        # self.num_batches_tracked.zero_()
    if module.affine:
        init.ones_(module.weight)
        init.zeros_(module.bias)


def is_bn(m):
    return isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d)


def is_absorbing(m):
    return (isinstance(m, nn.Conv2d)) or isinstance(m, nn.Linear)


def search_fold_and_remove_bn(model):
    model.eval()
    prev = None
    for n, m in model.named_children():
        if is_bn(m) and is_absorbing(prev):
            fold_bn_into_conv(prev, m)
            # set the bn module to straight through
            setattr(model, n, StraightThrough())
        elif is_absorbing(m):
            prev = m
        else:
            prev = search_fold_and_remove_bn(m)
    return prev


def search_back_bn(model):
    model.eval()
    prev = None
    for n, m in model.named_children():
        if is_bn(m) and is_absorbing(prev):
            back_bn_to_conv(prev, m)
            # set the bn module to straight through
            # setattr(model, n, StraightThrough())
        elif is_absorbing(m):
            prev = m
        else:
            prev = search_back_bn(m)
    return prev


def search_fold_and_reset_bn(model):
    model.eval()
    prev = None
    for n, m in model.named_children():
        if is_bn(m) and is_absorbing(prev):
            fold_bn_into_conv(prev, m)
            # reset_bn(m)
        else:
            search_fold_and_reset_bn(m)
        prev = m
