import numpy as np
import torch
import torch.nn.functional as F
from equivariant_diffusion.utils import assert_mean_zero_with_mask, remove_mean_with_mask,\
    assert_correctly_masked
from qm9.rdkit_functions import check_stability

def rotate_chain(z):
    assert z.size(0) == 1

    z_h = z[:, :, 3:]

    n_steps = 30
    theta = 0.6 * np.pi / n_steps
    Qz = torch.tensor(
        [[np.cos(theta), -np.sin(theta), 0.],
         [np.sin(theta), np.cos(theta), 0.],
         [0., 0., 1.]]
    ).float()
    Qx = torch.tensor(
        [[1., 0., 0.],
         [0., np.cos(theta), -np.sin(theta)],
         [0., np.sin(theta), np.cos(theta)]]
    ).float()
    Qy = torch.tensor(
        [[np.cos(theta), 0., np.sin(theta)],
         [0., 1., 0.],
         [-np.sin(theta), 0., np.cos(theta)]]
    ).float()

    Q = torch.mm(torch.mm(Qz, Qx), Qy)

    Q = Q.to(z.device)

    results = []
    results.append(z)
    for i in range(n_steps):
        z_x = results[-1][:, :, :3]
        # print(z_x.size(), Q.size())
        new_x = torch.matmul(z_x.view(-1, 3), Q.T).view(1, -1, 3)
        # print(new_x.size())
        new_z = torch.cat([new_x, z_h], dim=2)
        results.append(new_z)

    results = torch.cat(results, dim=0)
    return results


def reverse_tensor(x):
    return x[torch.arange(x.size(0) - 1, -1, -1)]


def sample_chain(args, device, flow, n_tries, dataset_info, prop_dist=None, rep_context=None, keep_frames=100):
    assert args.context_node_nf == 0 and prop_dist is None and rep_context is None, "Only support unconditional pcdm and unconditional rdm now"

    
    
    n_samples = 1
    if args.dataset == 'qm9' or args.dataset == 'qm9_second_half' or args.dataset == 'qm9_first_half':
        n_nodes = 19
    elif args.dataset == 'geom':
        n_nodes = 44
    else:
        raise ValueError()
    

    

    if args.context_node_nf > 0:
        assert args.conditioning

        context = prop_dist.sample(n_nodes)
        context = context.unsqueeze(1).unsqueeze(0)
        context = context.repeat(1, n_nodes, 1).to(device)

    else:
        context = None


    node_mask = torch.ones(n_samples, n_nodes, 1).to(device)

    edge_mask = (1 - torch.eye(n_nodes)).unsqueeze(0)
    edge_mask = edge_mask.repeat(n_samples, 1, 1).view(-1, 1).to(device)

    if args.probabilistic_model == 'diffusion':
        one_hot, charges, x = None, None, None
        for i in range(n_tries):
            chain = flow.sample_chain(n_samples, n_nodes, node_mask, edge_mask, context, keep_frames=keep_frames)
            chain = reverse_tensor(chain)   

            # Repeat last frame to see final sample better.
            # chain = torch.cat([chain, chain[-1:].repeat(10, 1, 1)], dim=0)
            
            
            
            x = chain[-1:, :, 0:3]
            one_hot = chain[-1:, :, 3:-1]
            one_hot = torch.argmax(one_hot, dim=2)

            atom_type = one_hot.squeeze(0).cpu().detach().numpy()
            x_squeeze = x.squeeze(0).cpu().detach().numpy()
            mol_stable = check_stability(x_squeeze, atom_type, dataset_info)[0]

            # Prepare entire chain.
            x = chain[:, :, 0:3]
            one_hot = chain[:, :, 3:-1]
            one_hot = F.one_hot(torch.argmax(one_hot, dim=2), num_classes=len(dataset_info['atom_decoder']))
            charges = torch.round(chain[:, :, -1:]).long()

            if mol_stable:
                print('Found stable molecule to visualize :)')
                break
            elif i == n_tries - 1:
                print('Did not find stable molecule, showing last sample.')

    else:
        raise ValueError

    return one_hot, charges, x



def sample(args, device, generative_model, dataset_info, 
           prop_dist=None, nodesxsample=torch.tensor([10]), rep_context=None,
           fix_noise=False, context=None, fixed_rep=None):

    assert prop_dist is None and args.context_node_nf == 0 and context is None, "The molecule generator is always unconditionally used (it only conditions on representations)."
    
    max_n_nodes = dataset_info['max_n_nodes']  # this is the maximum node_size in QM9

    assert int(torch.max(nodesxsample)) <= max_n_nodes
    batch_size = len(nodesxsample)

    node_mask = torch.zeros(batch_size, max_n_nodes)
    for i in range(batch_size):
        node_mask[i, 0:nodesxsample[i]] = 1

    # Compute edge_mask

    edge_mask = node_mask.unsqueeze(1) * node_mask.unsqueeze(2)
    diag_mask = ~torch.eye(edge_mask.size(1), dtype=torch.bool).unsqueeze(0)
    edge_mask *= diag_mask
    edge_mask = edge_mask.view(batch_size * max_n_nodes * max_n_nodes, 1).to(device)
    node_mask = node_mask.unsqueeze(2).to(device)


    # TODO FIX: This conditioning just zeros.
    if args.context_node_nf > 0:
        if context is None:
            context = prop_dist.sample_batch(nodesxsample)
        context = context.unsqueeze(1).repeat(1, max_n_nodes, 1).to(device) * node_mask
    else:
        context = None
        
        

    if args.probabilistic_model == 'diffusion':
        x, h = generative_model.sample(
            batch_size, 
            max_n_nodes, 
            node_mask, 
            edge_mask, 
            context=context, 
            fix_noise=fix_noise, 
            fixed_rep=fixed_rep,
            rep_context=rep_context, 
            )

        assert_correctly_masked(x, node_mask)
        assert_mean_zero_with_mask(x, node_mask)

        one_hot = h['categorical']
        charges = h['integer']

        assert_correctly_masked(one_hot.float(), node_mask)
        if args.include_charges:
            assert_correctly_masked(charges.float(), node_mask)

    else:
        raise ValueError(args.probabilistic_model)
    

    return one_hot, charges, x, node_mask


def sample_sweep_conditional(args, device, generative_model, dataset_info, prop_dist, n_nodes=19, n_frames=100, return_property_values=False, start_value=None, end_value=None):
    nodesxsample = torch.tensor([n_nodes] * n_frames)

    context = []
    if return_property_values:
        property_values = []
    assert len(prop_dist.distributions) == 1, "Only support 1 (addtional) condition now."
    for key in prop_dist.distributions:
        if start_value is not None and end_value is not None:
            min_val, max_val = start_value, end_value
        else:
            min_val, max_val = prop_dist.distributions[key][n_nodes]['params']
        print(f"Conditioning the property value: {min_val} - {max_val}")
        mean, mad = prop_dist.normalizer[key]['mean'], prop_dist.normalizer[key]['mad']
        min_val = (min_val - mean) / (mad)
        max_val = (max_val - mean) / (mad)
        context_row = torch.tensor(np.linspace(min_val, max_val, n_frames)).unsqueeze(1)
        context.append(context_row)
        property_values = [cont * mad + mean for cont in context_row]
    context = torch.cat(context, dim=1).float().to(device)
    

    one_hot, charges, x, node_mask = sample(args, device, generative_model, dataset_info, nodesxsample=nodesxsample, rep_context=context, fix_noise=True)
    if return_property_values:
        return one_hot, charges, x, node_mask, property_values
    
    return one_hot, charges, x, node_mask