import numpy as np

class GELU():
    """
    smooth, y-range [-inf, inf], vanishing gradient for x<<0 with exponential decay, high computationally expensive
    """
    @staticmethod
    def f(x):
        return 0.5 * x * (1 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * x**3)))

    @staticmethod
    def df(x, order=1):
        c = np.sqrt(2 / np.pi)
        tanh_part = np.tanh(c * (x + 0.044715 * x**3))
        match order:
            case 1:
                return 0.5 * (1 + tanh_part) + 0.5 * x * (1 - tanh_part**2) * c * (1 + 3 * 0.044715 * x**2)
            case _:
                raise NotImplementedError(f"GELU gradient order {order} is not implemented yet")
