import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable


def positional_encoding(sequence_length, embedding_dim):
    '''
    This function creates a positional encoding array that may be used to positionally
    encode an array.
    
    Arguments
    ---------
        sequence_length: integer
            this is the maximum length of the sequences.
        
        embedding_dim: integer
            this is the size of the embedding dimension
    
    Returns
    --------
        data: array
            this returns the data with the positional encoding applied to it.
    
    '''
    
    m = sequence_length
    d = embedding_dim
    pos = np.arange(m)
    element_pos = 2*np.arange(d/2)/d
    element_pos = 10000**element_pos

    func_input = np.ones((m, int((d+1)/2)))
    func_input = (func_input.T * pos).T
    func_input = func_input * element_pos
    sines = np.sin(func_input[:,:int((d+1)/2)])
    coses = np.cos(func_input[:,:int((d)/2)])

    pos_enconding = np.zeros((m,d))
    pos_enconding[:,::2] = sines
    pos_enconding[:,1::2] = coses
    
    return pos_enconding



class PositionalEncoder(nn.Module):
    def __init__(self, sequence_length, embedding_dim, device = 'cpu'):
        super().__init__()
        self.device = device
        self.pos_encoding = torch.from_numpy(positional_encoding(sequence_length, embedding_dim))
        if torch.cuda.is_available():
            self.pos_encoding = self.pos_encoding.to(self.device)
        self.m = sequence_length
        self.d = embedding_dim

        return
        
    def forward(self,X):
        
        dim_0 = X.size(1)
        X = X * np.sqrt(self.d)
        X = X + nn.Parameter(self.pos_encoding, requires_grad = False)[:dim_0,:]
        
        
        return X
