import numpy as np

class Mish():
    """
    smooth, y-range [-inf, inf], vanishing gradient for x<<0 with exponential decay, high computationally expensive
    """
    @staticmethod
    def f(x):
        return x * np.tanh(np.log1p(np.exp(x))) # x * tanh(SoftPlus(x))
    @staticmethod
    def df(x, order=1):
        softplus_x = np.log1p(np.exp(x))
        tanh_sp_x = np.tanh(softplus_x)
        match order:
            case 1:
                return tanh_sp_x + x * (1 - tanh_sp_x**2) / (1 + np.exp(-x))
            case _:
                raise NotImplementedError(f"Mish gradient order {order} is not implemented yet")
