%Used to produce the Results section of paper
%MVCE calculation requires Michael Todd's code minvol.m, which is available
%at https://epubs.siam.org/doi/10.1137/1.9781611974386#supplementary-material, along with its subroutines

%The rotated cauchy dataset requires Michael Todd's code rot_cauchy.m,
%which can be found in Minimum-Volume Ellipsoids: Theory and Algorithms.
%Otherwise, I will provide alternative code (however, this dataset will be 
%different to the one in the paper).
%-------------------------------------------------------------------------- 
%--------------------------------------------------------------------------
clear;

%INPUTS
tol = 1e-9;             %tolerance for MVCE algorithm
n = 1e7;                %number of data points
d = 100;                %dimension
percent = 0.1:0.1:10;   %percentages of n 
s = (percent./100)*n;   %values for s

deterministic = 1;      %turn on/off deterministic leverage sampling
random = 0;             %turn on/off random leverage sampling
uniform = 0;            %turn on/off uniform sampling
full = 0;               %turn on/off full MVCE calculation

dataset = 1;            %1 = rotated cauchy, 2 = lognormal, 3 = gaussian
rotcauchy_choice = 2;   %Only required if dataset == 1
                        %1 = rot_cauchy.m, 2 = if rot_cauchy.m is unavailable
appD_choice = 0;        %turn on/off Appendix D calculations                       
%--------------------------------------------------------------------------
%--------------------------------------------------------------------------
%Dataset
rnd = 1; rng(rnd);
if dataset == 1         %Rotated Cauchy
    if rotcauchy_choice == 1      
        X = rot_cauchy(d,n,rnd,1);     
    else
        scale = trnd(1,1,n);            %generates n Cauchy random variables
        Y = randn(d,n);
        Y = Y./vecnorm(Y);              %generates points with distance 1 from origin
        X = scale.*Y;                   %distances from origin are Cauchy
    end
elseif dataset == 2     %Lognormal
    X = lognrnd(0,3,d,n);            
else
    X = randn(d,n);     %Gaussian               
end

%Initialise vectors
sz = length(s);
timeD = zeros(1,sz); timeR = zeros(1,sz); timeU = zeros(1,sz);
gs_starD = zeros(1,sz); gs_starR = zeros(1,sz); gs_starU = zeros(1,sz);
deltaD = zeros(1,sz); deltaR = zeros(1,sz); deltaU = zeros(1,sz);

%Calculate full MVCE
if full == 1
    st = cputime;
    [~,R,factor,~,~,~,~,~,~,~,~,~,~] = minvol(X,tol);
    timeF = cputime - st;
    
    %Calculate g_star
    R = R/sqrt(factor);
    XUX = R'*R;
    g_star = log(det(XUX));
    
    c = 0;
    while isinf(g_star) && c < 100
        c = c + 1;
        XUXsmall = XUX./(10^c);
        g_star = log(det(XUXsmall)) + c*d*log(10);
    end
end

%Calculate leverage scores
st = cputime;
A = X*X';
A_fact = chol(A);
dis_tmp = A_fact'\X;
distance = sum( dis_tmp .* dis_tmp, 1);
timelev = cputime - st;

%Calculate MVCEs using sampled datasets
for j = 1:sz
    sj = round(s(j));
    if deterministic == 1
        st = cputime;
        
        %Select points with highest leverage, assemble into A
        [~,sampleInd] = maxk(distance,sj); 
        A = X(:,sampleInd);
        
        %Calculate MVCE(A) where A contains the selected points
        [~,R,factor,~,~,~,~,~,~,~,~,~,~] = minvol(A,tol);
        
        timeD(j) = cputime - st;
        timeD(j) = timelev + timeD(j);

        %Calculate gs_star
        R = R/sqrt(factor);
        XUX = R'*R;
        gs_starD(j) = log(det(XUX));
        
        c = 0;
        while isinf(gs_starD(j)) && c < 20
            c = c + 1;
            XUXsmall = XUX./(10^c);
            gs_starD(j) = log(det(XUXsmall)) + c*d*log(10);
        end
        
        if appD_choice == 1
            %Calculate delta = max{ d(u,xi) - d } / d
            dis_tmp = R'\X;
            dist = sum( dis_tmp .* dis_tmp, 1);
            deltaD(j) = max(dist - d)/d;
        end
        
        disp('deterministic done')
        j
    end
    
    if random == 1
        st = cputime;
        
        %Sample with probability proportionate to leverage scores, assemble into A
        pop = 1:n;
        sampleInd = datasample(pop, sj, 'Weights', distance, 'Replace', false);
        A = X(:,sampleInd);

        %Calculate MVCE(A) where A contains the selected points
        [~,R,factor,~,~,~,~,~,~,~,~,~,~] = minvol(A,tol);
        
        timeR(j) = cputime - st;
        timeR(j) = timelev + timeR(j);

        %Calculate gs_star
        R = R/sqrt(factor);
        XUX = R'*R;
        gs_starR(j) = log(det(XUX));
        
        c = 0;
        while isinf(gs_starR(j)) && c < 20
            c = c + 1;
            XUXsmall = XUX./(10^c);
            gs_starR(j) = log(det(XUXsmall)) + c*d*log(10);
        end
        
        if appD_choice == 1
            %Calculate delta = max{ d(u,xi) - d } / d
            dis_tmp = R'\X;
            dist = sum( dis_tmp .* dis_tmp, 1);
            deltaR(j) = max(dist - d)/d;
        end
        
        disp('probabilistic done')
        j
    end
    
    if uniform == 1
        st = cputime;
        
        %Sample with uniform probability, assemble into A
        sampleInd = randsample(n,sj)';               
        A = X(:,sampleInd);

        %Calculate MVCE(A) where A contains the selected points
        [~,R,factor,~,~,~,~,~,~,~,~,~,~] = minvol(A,tol);
        
        timeU(j) = cputime - st;

        %Calculate gs_star
        R = R/sqrt(factor);
        XUX = R'*R;
        gs_starU(j) = log(det(XUX));
        
        c = 0;
        while isinf(gs_starU(j)) && c < 20
            c = c + 1;
            XUXsmall = XUX./(10^c);
            gs_starU(j) = log(det(XUXsmall)) + c*d*log(10);
        end
        
        if appD_choice == 1
            %Calculate delta = max{ d(u,xi) - d } / d
            dis_tmp = R'\X;
            dist = sum( dis_tmp .* dis_tmp, 1);
            deltaU(j) = max(dist - d)/d;
        end
        
        disp('uniform done')
        j
    end
end

%Matrices for Section 7 
diffD = g_star - gs_starD(1,:); diffR = g_star - gs_starR(1,:); diffU = g_star - gs_starU(1,:);

diff = [diffD; diffR; diffU];
time = [timeD; timeR; timeU; timeF*ones(1,sz)];

