import torch.nn as nn
import torch.nn.functional as F
import torch
import ipdb
import gc
st = ipdb.set_trace


def square_distance(src, dst):
    """
    Calculate Euclidean distance between each two points.
    Args:
        src: source points, [B, N, C]
        dst: target points, [B, M, C]
    Returns:
        dist: per-point square distance, [B, N, M]
    """
    B, N, _ = src.shape
    _, M, _ = dst.shape
    dist = -2 * torch.matmul(src, dst.permute(0, 2, 1))
    dist += torch.sum(src ** 2, -1).view(B, N, 1)
    dist += torch.sum(dst ** 2, -1).view(B, 1, M)
    return dist


def query_ball_point(radius, nsample, xyz, new_xyz):
    """
    Input:
        radius: local region radius
        nsample: max sample number in local region
        xyz: all points, [B, N, 3]
        new_xyz: query points, [B, S, 3]
    Return:
        group_idx: grouped points index, [B, S, nsample]
    """
    device = xyz.device
    B, N, C = xyz.shape
    _, S, _ = new_xyz.shape
    group_idx = torch.arange(N, dtype=torch.long).to(device).view(1, 1, N).repeat([B, S, 1])
    sqrdists = square_distance(new_xyz, xyz)

    # KNN
    group_idx2 = sqrdists.sort(dim=-1)[1][:, :, :nsample]

    group_idx[sqrdists > radius ** 2] = N
    group_idx = group_idx.sort(dim=-1)[0][:, :, :nsample]
    group_first = group_idx[:, :, 0].view(B, S, 1).repeat([1, 1, nsample])
    mask = group_idx == N
    group_idx[mask] = group_first[mask]

    mask = group_idx == N
    group_idx[mask] = group_idx2[mask]

    mask = group_idx == N
    assert mask.sum() == 0

    return group_idx


def sample_and_group(radius, nsample, xyz, sampling_points):
    """
    Input:
        radius: neighbourhood radius
        xyz: input points position data, [B, N, 3]
        points: input points data, [B, N, D]
        sampling_points: points [B, S, 3]
    Return:
        new_xyz: sampled points position data, [B, npoints, nsample, 3]
        new_points: sampled points data, [B, npoints, nsample, 3+D]
    """
    B, N, C = xyz.shape
    B2, S, C2 = sampling_points.shape
    assert B == B2, C == C2
    idx = query_ball_point(radius, nsample, xyz, sampling_points)
    # torch.cuda.empty_cache()
    return idx
    
    
def index_points(points, idx):
    """
    Args:
        points: input points data, [B, N, C]
        idx: sample index data, [B, S, nsamples]
    Returns:
        new_points:, indexed points data, [B, S, nsamples, C]
    """
    device = points.device
    B = points.shape[0]
    view_shape = list(idx.shape)  # B, S, nsamples
    view_shape[1:] = [1] * (len(view_shape) - 1)
    repeat_shape = list(idx.shape)
    repeat_shape[0] = 1
    batch_indices = torch.arange(
        B, dtype=torch.long
    ).to(device).view(view_shape).repeat(repeat_shape) # B, S, n_samples
    new_points = points[batch_indices, idx, :]
    return new_points


def radius_gaussian(sq_r, sig, eps=1e-9):
    """
    Compute a radius gaussian (gaussian of distance)
    :param sq_r: input radiuses [dn, ..., d1, d0]
    :param sig: extents of gaussians [d1, d0] or [d0] or float
    :return: gaussian of sq_r [dn, ..., d1, d0]
    """
    return torch.exp(-sq_r / (2 * sig**2 + eps))



def pc_feature_interpolation(
    xyz,
    points,
    sampling_locations,
    radius=0.8,
    nsample=16,
    use_losses=False,
):
    """
    Input:
        xyz: input points position data, [B, C, N]
        points: input points data, [B, D, N]
        sampling_locations: query points, [B, N', 3]
    Return:
        new_points_concat: sample points feature data, [B, N', D]
    """
    S = sampling_locations.shape[1]
    xyz = xyz.permute(0, 2, 1)  # B, N, C
    if points is not None:
        points = points.permute(0, 2, 1)  # B, N, D

    sampled_neighbours_idx = sample_and_group(
        radius,
        nsample,
        xyz,
        sampling_locations,
    )  # [B, S, nsample]

    # B, S, nsample, D
    sampled_features = index_points(points, sampled_neighbours_idx)
    # torch.cuda.empty_cache()

    # B, S, nsample, 3
    sampled_points = index_points(xyz, sampled_neighbours_idx)
    # torch.cuda.empty_cache()

    # B. S, nsamples, 3
    differences = sampling_locations.unsqueeze(2) - sampled_points

    # B, S, nsamples
    sq_distances = torch.sum(differences ** 2, dim=3)

    sigma = radius * 0.3  # taken from kpconv

    # B, S, nsamples
    all_weights = radius_gaussian(sq_distances, sigma)

    # B, S, 1, nsamples * B, S, nsample, D
    weighted_features = torch.matmul(
        all_weights.unsqueeze(-2), sampled_features).squeeze(2)

    if use_losses:
    ############### Fitting Loss #####################
    # Calculating loss here to save memory
    # Get the distance to closest input point and normalize to be independant from layers
        nn_dist = sq_distances.min(dim=-1)[0] / radius ** 2
        fit_loss = F.l1_loss(nn_dist, torch.zeros_like(nn_dist))
    else:
        fit_loss = 0.0

    # aux_loss = 2 * fit_loss + repulsive_loss
    return weighted_features, fit_loss


def get_repulsion_loss(sampling_locations, radius):

    _, S, _ = sampling_locations.shape

    # normalized sampling positions
    sampling_locations = sampling_locations / radius

    # its set to 2* radius so that influence area of each point doesn't overlap
    repulsion_extent = 2 * radius

    repulsive_loss = 0.0
    for i in range(S):
        other_points = torch.cat([sampling_locations[:, :i, :],
                                  sampling_locations[:, i+1:, :]], dim=1
                                 ).detach()
        distances = torch.sqrt(
            torch.sum((
                other_points - sampling_locations[:, i:i+1, :]) ** 2, dim=2))
        rep_loss = torch.sum(
            torch.clamp_max(distances - repulsion_extent, max=0.0) ** 2, dim=1)
        repulsive_loss += F.l1_loss(rep_loss, torch.zeros_like(rep_loss)) / S

    assert torch.isnan(repulsive_loss).sum() == 0
    return repulsive_loss
