function doubly_stochastic_denoising()
% Find a doubly stochastic matrix closest to a given matrix, in Frobenius norm.
%
% This example demonstrates how to use the geometry factories for the
% doubly stochastic multinomial manifold:
%  multinomialdoublystochasticfactory and
%  multinomialsymmetricfactory (for the symmetric case.)
% 
% The file is based on developments in the research paper
% A. Douik and B. Hassibi, "Manifold Optimization Over the Set 
% of Doubly Stochastic Matrices: A Second-Order Geometry"
% ArXiv:1802.02628, 2018.
%
% Link to the paper: https://arxiv.org/abs/1802.02628.
%
% Please cite the Manopt paper as well as the research paper:
% @Techreport{Douik2018Manifold,
%   Title   = {Manifold Optimization Over the Set of Doubly Stochastic 
%              Matrices: {A} Second-Order Geometry},
%   Author  = {Douik, A. and Hassibi, B.},
%   Journal = {Arxiv preprint ArXiv:1802.02628},
%   Year    = {2018}
% }
% 
% This can be a starting point for many optimization problems of the form:
%
% minimize f(X) such that X is a doubly stochastic matrix (symmetric or not)
%
% Input:  None. This example file generates random data.
% 
% Output: None.
%
% This file is part of Manopt: www.manopt.org.
% Original author: Ahmed Douik, March 15, 2018.
% Contributors:
% Change log:
%
%    Xiaowen Jiang Aug. 31, 2021
%       Added AD to compute the egrad and the ehess  

    %% Generate input data
    n = 100;
    sigma = 1/n^2;
    % Generate a doubly stochastic matrix using the Sinkhorn algorithm
    B = doubly_stochastic(abs(randn(n, n))); 
    % Add noise to the matrix
    A = max(B + sigma*randn(n, n), 0.01);

    %% Setup an optimization problem for denoising
    
    % Manifold initialization: pick the symmetric or non-symmetric case
    symmetric_case = true;
    if ~symmetric_case
        manifold = multinomialdoublystochasticfactory(n);
    else
        % Input must also be symmetric (otherwhise, gradient must be adapted.)
        A = (A+A')/2;
        manifold = multinomialsymmetricfactory(n);
    end
    problem.M = manifold;
    
    % Specify cost function and derivatives
    problem.cost = @(X) 0.5*norm(A-X, 'fro')^2;
    problem.egrad = @(X) X - A;  % Euclidean gradient
    problem.ehess = @(X, U) U;   % Euclidean Hessian

    % An alternative way to compute the egrad and the ehess is to use 
    % automatic differentiation provided in the deep learning toolbox (slower).
    % Notice that the function norm is not supported for AD so far.
    % Replace the expression norm(..., 'fro')^2 with cnormsqfro described
    % in the file manoptADhelp.m.
    % problem.cost = @(X) 0.5*cnormsqfro(A-X);
    % Call manoptAD to prepare AD for the problem structure
    % problem = manoptAD(problem);
    
    % Check consistency of the gradient and the Hessian. Useful if you
    % adapt this example for a new cost function and you would like to make
    % sure there is no mistake. These checks are optional of course.
    warning('off', 'manopt:multinomialdoublystochasticfactory:exp');
    warning('off', 'manopt:multinomialsymmetricfactory:exp');
    figure();
    checkgradient(problem); % Check the gradient
    figure();
    checkhessian(problem);  % Check the hessian. This test usually fails
                            % due to the use of a first order retraction, 
                            % unless the test is performed at a critical point.
    
    %% Solve
    % A first order algorithm
    % Minimize the cost function using the Conjugate Gradients algorithm.
    [X1, ~, info, ~] = conjugategradient(problem); 
    S1 = [info.gradnorm] ; % Collecting the Gradient information
    
    % Computing the distance between the original, noiseless matrix and the
    % found solution
    fprintf('||X-B||_F^2 = %g\n', norm(X1 - B, 'fro')^2);

    % A second order algorithm
    % Minimize the cost function using the trust-regions method. 
    [X2, ~, info, ~] = trustregions(problem);                                     
    S2 = [info.gradnorm] ; % Collecting the Gradient information

    % Computing the distance between the original, noiseless matrix and the
    % found solution
    fprintf('||X-B||_F^2 = %g\n', norm(X2 - B, 'fro')^2);
    
    
    %% Display
    figure();
    semilogy(S1, 'Color', 'red', 'Marker', '*', 'LineWidth', 2, ...
                 'DisplayName', 'Conjugate Gradient Algorithm');
    hold on;
    semilogy(S2, 'Color', 'blue', 'Marker', '+', 'LineWidth', 2, ...
                 'DisplayName', 'Trust Regions Method');
    
    set(gca, 'YGrid', 'on', 'XGrid', 'on');
    xlabel('Number of Iteration', 'FontSize', 16);
    ylabel('Norm of Gradient', 'FontSize', 16);
    legend1 = legend('show');
    set(legend1, 'FontSize', 16);

    % This Hessian test is performed at a computed solution, which is
    % expected to be a critical point. Thus we expect the slope test to
    % succeed. It could fail if the solution X1 has entries very close to
    % zero, so that numerical issues might come up. This is because de
    % Fisher metric on the multinomial manifold involves division by the
    % entries of X.
    figure();
    checkhessian(problem, X1);
    
end
