function [a_now, iter_mat, values_mat] = newton_raphson_cubic_vectorized(v1sq, v2sq, v1v2, m1, m2, m1m2, numNRiter)

  % This function implements a vectorized 
  % version of NR
  % 
  % Inputs:
  % 'v1sq' 'v2sq': Vectors of respective 
  % squared values
  % 'v1v2': Vector of inner products 
  % 'm1', 'm2': Scalars ; corresponds to 
  % sigma_{11}, sigma_{22}
  % 'm1m2': Scalars; corresponds to 
  % sigma_{11}*sigma_{22}
  % 'numNRIter': Iterations for NR to run

  % Outputs:
  % 'a_now': The approximation of the 
  % root after the iterations.
  % 'iter_mat': A vector storing the 
  % number of iterations it took to reach
  % the approximation within the desired tolerance.
  % 'values_mat': An array storing the values taken
  % at each update step

  % TOL here is set to 0.0001 just to find
  % convergence rate
  TOL = 0.0001;
  tot_size = length(v1sq);
  a_now = v1v2;
  prev_prob = a_now;
  len_anow = tot_size;
  values_mat = nan(11,100);
  values_mat(1,:) = v1v2;
  iter_mat = zeros(1, len_anow);

  % Compute the real roots

  a_prev = zeros(1, len_anow);

  % Terminate also when no more elements to do NR over 
  is_term = tot_size;
  
  % Check number of idxes to go ; we stop iterating an element when TOL is met
  idx_to_go = 1:len_anow;
  
  iter = 0;

  while is_term > 0 && iter < numNRiter

    iter_mat(idx_to_go) = iter_mat(idx_to_go) + 1;
    iter = iter + 1;
    
    % Update the indexes where we need to continue doing NR
    a_prev(idx_to_go) = a_now(idx_to_go);

    % Update steps
    [ fx, dfx ] = local_NR_for_a( a_now(idx_to_go), v1v2(idx_to_go), v1sq(idx_to_go), v2sq(idx_to_go),m1,m2,m1m2);       

    a_now(idx_to_go) = a_now(idx_to_go) - fx./dfx;
    values_mat(iter+1,idx_to_go) = a_now(idx_to_go);
    % check which points to still iterate on    
    idx_to_go = idx_to_go(abs(a_now(idx_to_go) - a_prev(idx_to_go)) > TOL);
    is_term = length(idx_to_go);

  end

  % In case there is no convergence, we 
  % set this to the initial estimate  
  a_now(idx_to_go) = prev_prob(idx_to_go);

  % If there is convergence past the inner 
  % product bounds (via cosine rule), then 
  % set the MLE estimate to be the
  % upper (or corresponding) lower bound
  a_now(a_now > sqrt(m1m2)) = sqrt(m1m2);
  a_now(a_now < -sqrt(m1m2)) = -sqrt(m1m2);
end

function [ fx, dfx ] = local_NR_for_a( a_now, v1v2, v1sq, v2sq, m1, m2, m1m2 )

  % Helper function to compute derivatives
  % 'fx': The value of the cubic function 
  % at 'a_now'.
  % 'dfx': The value of the derivative of 
  % the cubic function at 'a_now'.

  a3coef = 1;
  a2coef = -v1v2;
  a1coef = -m1m2 + v2sq*m1 + v1sq*m2;
  a0coef = -v1v2*m1m2;


  fx = a3coef .* a_now.^3 + a2coef .* a_now.^2  + a_now .* a1coef + a0coef;
  dfx = 3 * a3coef .* a_now.^2 + 2 * (a_now .* a2coef) +a1coef;

end
