function nn = nnff2(nn, x, y, l)
%NNFF performs a feedforward pass
% nn = nnff(nn, x, y) returns an neural network structure with updated
% layer activations, error and loss (nn.a, nn.e and nn.L)

    n = nn.n;
    m = size(x, 1);
    
    x = [ones(m,1) x];
    nn.a{1} = x;

    %feedforward pass
    for i = 2 : n-1
        switch nn.activation_function 
            case 'sigm'
                if i == 2 
                    nn.a{i} = sigm(nn.a{i - 1} * nn.W{i - 1}');
                end
                if i == 3
                    nn.a{i} = zeros(m,nn.size(3));
                    for k = 1:m

                        ww = nn.W{i - 1};
                        pp = nn.a{i - 1}(k,:);


                        z_G = pp(:,2:end);
                        options = optimset('TolFun',0.0001);
                        z = fsolve(@(xx)root7d(xx,pp,ww), z_G, options);
                        %nn.a{i}(k,:) = sigm(z);
                        disp(k);
                        disp(i);
                        disp(l);


                        nn.a{i}(k,:) = sigm(z);
                    end

                    nn.a{i} = nn.a{i} + nn.weightPenaltyL1 * sum(abs(nn.W{i - 1}'));
                end

            case 'tanh_opt'
                if i == 2 
                    nn.a{i} = tanh_opt(nn.a{i - 1} * nn.W{i - 1}');
                end
                if i == 3
                    nn.a{i} = zeros(m,10);
                    for k = 1:m

                        ww = nn.W{i - 1};
                        pp = nn.a{i - 1}(k,:);


                        z_G = pp(:,2:end);
                        options = optimset('TolFun',0.0001);
                        z = fsolve(@(x)root6d(x,pp,ww), z_G, options);
                        nn.a{i}(k,:) = tanh_opt(z);
                        disp(k);
                        disp(i);
                        disp(l);


                        %for jj = 1:10
                        %    ss = 0;
                        %    for kk = 1:10
                        %        ss = ss + ww(jj,kk)*tanh_opt(z(1,kk));
                        %    end 


                        %    Fij = z(1,jj)-ss-pp(1,jj);

                        %    %disp(Fij);
                        %end

                    end

                    %nn.a{i} = tanh_opt(nn.a{i - 1} * nn.W{i - 1}');
                end

                nn.a{i} = nn.a{i} + nn.weightPenaltyL1 * sum(abs(nn.W{i - 1}'));
        end
        
        %dropout
        if(nn.dropoutFraction > 0)
            if(nn.testing)
                nn.a{i} = nn.a{i}.*(1 - nn.dropoutFraction);
            else
                nn.dropOutMask{i} = (rand(-(nn.a{i}))>nn.dropoutFraction);
                nn.a{i} = nn.a{i}.*nn.dropOutMask{i};
            end
        end
        
        %calculate running exponential activations for use with sparsity
        if(nn.nonSparsityPenalty>0)
            nn.p{i} = 0.99 * nn.p{i} + 0.01 * mean(nn.a{i}, 1);
        end
        
        %Add the bias term
        %nn.a{i} = [ones(m,1) nn.a{i}];

        if i == 3
            nn.p{i - 1} = nn.a{i - 1};
            nn.a{i - 1} = [ones(m,1) nn.a{i}];
        end

        nn.a{i} = [ones(m,1) nn.a{i}];
    
    end
    switch nn.output 
        case 'sigm'
            nn.a{n} = sigm(nn.a{n - 1} * nn.W{n - 1}');
        case 'linear'
            nn.a{n} = nn.a{n - 1} * nn.W{n - 1}';
        case 'softmax'
            nn.a{n} = nn.a{n - 1} * nn.W{n - 1}';
            nn.a{n} = exp(bsxfun(@minus, nn.a{n}, max(nn.a{n},[],2)));
            nn.a{n} = bsxfun(@rdivide, nn.a{n}, sum(nn.a{n}, 2)); 
    end

    %error and loss
    nn.e = y - nn.a{n};
    
    switch nn.output
        case {'sigm', 'linear'}
            nn.L = 1/2 * sum(sum(nn.e .^ 2)) / m; 
        case 'softmax'
            nn.L = -sum(sum(y .* log(nn.a{n}))) / m;
    end
end
