function [P, r_new, k] = cleanup_csgo_optimized(A, B, P_old, iter, r_old, S_rnd, tol) 

    [n, ~] = size(A);

    % value1 = ones(n, 1);

    for k = 1:iter
        Grad = A * P_old * B;
        
        % D = greedy_match(Grad);

        match_list = matchpairs(-Grad, 1e10);

        D = zeros(n);

        linear_ind = sub2ind([n, n], match_list(:, 1), match_list(:, 2));
        D(linear_ind) = 1;
        
        Delta = D - P_old;

        GradDelta = A * Delta * B;

        % trace(X' * Y) = sum(sum(X .* Y))

        a = 0.5 * sum(sum(Delta .* GradDelta));

        b = sum(sum(P_old .* GradDelta));


        if a >= 0
            alpha = 1;
        else
            alpha = min(1, -b / (2*a));
        end

        % alpha = 1;

        % alpha = max(0, min(1, alpha));

        P_new = P_old + alpha * Delta;

        % value = sum(P_new, 1);

        r_new = norm(P_new - P_old) / norm(P_old);
        if abs(r_new - r_old) < tol
            break
        end
        r_old = r_new;
        P_old = P_new;
    end

    match_list = matchpairs(-A*P_new*B, 1e10);

    P = zeros(n);

    linear_ind = sub2ind([n, n], match_list(:, 1), match_list(:, 2));
    
    P(linear_ind) = 1;

    r_new = full(sum(sum(S_rnd .* P))/n);
end