import pytest

import torch
import torch.nn as nn

from nesim.losses.neighbourhood_cossim.loss import NeighbourhoodCosineSimilarityLoss
from nesim.utils.grid_size import find_rectangle_dimensions

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

conv_layer_possible_params = {
    "in_channels": [64, 128],
    "out_channels": [15, 512, 1024],
    "kernel_size": [3, 5],
    "stride": [1, 2, 3],
}


@pytest.mark.parametrize("in_channels", conv_layer_possible_params["in_channels"])
@pytest.mark.parametrize("out_channels", conv_layer_possible_params["out_channels"])
@pytest.mark.parametrize("kernel_size", conv_layer_possible_params["kernel_size"])
@pytest.mark.parametrize("stride", conv_layer_possible_params["stride"])
def test_conv(in_channels, out_channels, kernel_size, stride):
    """
    Test the following:
    1. get_loss() should return a tensor
    2. get_loss() should return a 0-D tensor
    3. grid should give us a tensor of shape: [size.height, size.width, *]
    """
    conv_layer = nn.Conv2d(
        in_channels, out_channels, kernel_size=kernel_size, stride=stride
    )
    size = find_rectangle_dimensions(area=out_channels)

    loss_calculator = NeighbourhoodCosineSimilarityLoss(layer=conv_layer, device=device)
    loss = loss_calculator.get_loss()

    # 1
    assert torch.is_tensor(loss), "Expected loss to be a torch tensor"
    # 2
    assert len(loss.shape) == 0
    # 3
    assert loss_calculator.grid_container.grid.shape[:2] == (size.height, size.width)


@pytest.mark.parametrize("in_channels", conv_layer_possible_params["in_channels"])
@pytest.mark.parametrize("out_channels", conv_layer_possible_params["out_channels"])
@pytest.mark.parametrize("kernel_size", conv_layer_possible_params["kernel_size"])
@pytest.mark.parametrize("stride", conv_layer_possible_params["stride"])
def test_fast_vs_original(in_channels, out_channels, kernel_size, stride):
    """
    Test the following:
    1. get_loss() should return a tensor
    2. get_loss() should return a 0-D tensor
    3. grid should give us a tensor of shape: [size.height, size.width, *]
    """
    conv_layer = nn.Conv2d(
        in_channels, out_channels, kernel_size=kernel_size, stride=stride
    )

    loss_calculator = NeighbourhoodCosineSimilarityLoss(
        layer=conv_layer, device=device, padding=0, kernel_size=3, stride=1
    )
    loss_fast = loss_calculator.get_loss_fast()
    loss_original = loss_calculator.get_loss_original()

    assert torch.allclose(loss_fast, loss_original, atol=0.1) == True


@pytest.mark.parametrize("in_channels", conv_layer_possible_params["in_channels"])
@pytest.mark.parametrize("out_channels", [15, 64])
@pytest.mark.parametrize("kernel_size", conv_layer_possible_params["kernel_size"])
@pytest.mark.parametrize("stride", [1])
@pytest.mark.parametrize("n_training_steps", [1, 2, 10])
def test_training_loop(
    in_channels, out_channels, kernel_size, stride, n_training_steps
):
    """
    Test the following:
    1. get_loss_fast() should return the same result as get_loss_original()
    2. on a small training loop, both losses should go down with very close values
    """
    conv_layer = nn.Conv2d(
        in_channels, out_channels, kernel_size=kernel_size, stride=stride
    )

    loss_calculator = NeighbourhoodCosineSimilarityLoss(
        layer=conv_layer, device=device, padding=0, kernel_size=3, stride=1
    )
    optimizer = torch.optim.Adam(conv_layer.parameters(), lr=1e-2)

    for train_step_idx in range(n_training_steps):
        optimizer.zero_grad()
        loss = loss_calculator.get_loss_fast()
        with torch.no_grad():
            loss_original = loss_calculator.get_loss_original()
        loss.backward()
        optimizer.step()
        assert torch.allclose(loss, loss_original, atol=0.1) == True

    ## post training
    loss = loss_calculator.get_loss_fast()
    loss_original = loss_calculator.get_loss_original()
    assert torch.allclose(loss, loss_original, atol=0.1) == True
