classdef nnExpLayer < nnActivationLayer
% nnExpLayer - class for exponential layers
%
% Syntax:
%    obj = nnExpLayer(name)
%
% Inputs:
%    name - name of the layer, defaults to type
%
% Outputs:
%    obj - generated object
%
% References:
%    [1] Bonaert et al. "Fast and Precise Certification of Transformers",
%    2021
%
% Other m-files required: none
% Subfunctions: none
% MAT-files required: none
%
% See also: neuralNetwork

% Authors:       Tobias Ladner
% Written:       20-June-2024
% Last update:   ---
% Last revision: ---

% ------------------------------ BEGIN CODE -------------------------------

methods
    % constructor
    function obj = nnExpLayer(name)
        if nargin < 1
            name = [];
        end
        % call super class constructor
        obj@nnActivationLayer(name)
    end

    function [df_l, df_u] = getDerBounds(obj, l, u)
        % df_l and df_u as lower and upper bound for the derivative
        df_l = exp(l);
        df_u = exp(u);
    end

    % public method to test computeApproxPolyCustom
        function [coeffs, d] = testComputeApproxPolyCustom(obj, l, u, order, poly_method)
            [coeffs, d] = obj.computeApproxPolyCustom(l, u, order, poly_method);
        end
end

% evaluate ----------------------------------------------------------------

methods  (Access = {?nnLayer, ?neuralNetwork})
    % interval
    function bounds = evaluateInterval(obj, bounds, options)
        bounds = evaluateInterval@nnActivationLayer(obj, bounds, options);
    end

    % numeric
    function [r, obj] = evaluateNumeric(obj, input, options)
        r = exp(input);
    end
end

methods (Access=protected)
   function [coeffs, d] = computeApproxPolyCustom(obj, l, u, order, poly_method)
        % implement custom polynomial computation in subclass [1, Section 4.5]
        coeffs = []; d = [];
        %f = obj.f;
        %df = obj.getDf(1);

       if strcmp(poly_method, 'singh') 
          if order == 1 
            % according to [1, Theorem 3.2] where we interpret
            % exp as lower part of an S-curve
            %lambda = min(df(l), df(u));
            %mu1 = 0.5 * (f(u) + f(l) - lambda * (u + l));
            %mu2 = 0.5 * (f(u) - f(l) - lambda * (u - l));
            %coeffs = [lambda, mu1];
            %d = mu2

           %should always be a small positive constant value
           epsilon_hat = 0.01;
           
           t_1 = log((exp(u) - exp(l)) / (u - l));
           t_2 = l + 1 - epsilon_hat;

           %t_opt is min of t_1 and t_2 ensures that the approximation is
           %positive
           t_opt = min(t_1, t_2);

           %compute coeffs and approx error
           lambda = exp(t_opt);
           mu = 0.5 * (exp(t_opt) - lambda * t_opt + exp(u) - lambda * u);
           beta = 0.5 * (lambda * t_opt - exp(t_opt) + exp(u) - lambda * u);

           %set coeffs and approx error
           coeffs = [lambda, mu];
           d = beta;
          end
      end
   end
end

end

% ------------------------------ END OF CODE ------------------------------
