%Used to produce results in Appendix E (Real World Data). 
%(Note, we repeat the experiments 5 times to obtain mean and standard deviation.)

%MVCE calculation requires Michael Todd's code minvol.m, which is available
%at https://epubs.siam.org/doi/10.1137/1.9781611974386#supplementary-material, 
%along with its subroutines.
%Delete output argument 'time', as it is not assigned during call to minvol
%Also, comment out section that draws ellipse if d = 2 (lines 106 - 121)

%Datasets can be downloaded from the UCI Machine Learning Repository (see 
%the README).

%Download the datasets into the "supplementary" folder, so that this code 
%can access them. Do not change the file names, otherwise the code will 
%need to be modified accordingly.
%-------------------------------------------------------------------------- 
%--------------------------------------------------------------------------
clear;

%INPUTS
tol = 1e-9;             %tolerance for MVCE algorithm
percent = [1 5 10];     %percentages of n

deterministic = 1;      %turn on/off deterministic leverage sampling
random = 1;             %turn on/off random leverage sampling
uniform = 1;            %turn on/off uniform sampling
full = 1;               %turn on/off full MVCE calculation

dataset = 1;            %1 = ethylene CO, 2 = ethylene CH4, 3 = skin

%----------------------------------------------------------------------
%----------------------------------------------------------------------

if dataset == 1
    X = readmatrix('ethylene_CO.txt')';
elseif dataset == 2
    X = readmatrix('ethylene_methane.txt')';
else
    X = readmatrix('Skin_NonSkin.txt')';
end

n = size(X,2);          %number of datapoints
d = size(X,1);          %dimension
s = (percent./100)*n;   %values for s

%Initialise vectors
sz = length(s);
timeD = zeros(1,sz); timeR = zeros(1,sz); timeU = zeros(1,sz);
gs_starD = zeros(1,sz); gs_starR = zeros(1,sz); gs_starU = zeros(1,sz);

%Solve MVCE(X)
if full == 1
    st = cputime;
    [~,R,factor,~,~,~,~,~,~,~,~] = minvol(X,tol);
    timeF = cputime - st;

    %Calculate g_star
    R = R/sqrt(factor);
    XUX = R'*R;
    g_star = log(det(XUX));
    
    c = 0;
    while isinf(g_star) && c < 100
        c = c + 1;
        XUXsmall = XUX./(10^c);
        g_star = log(det(XUXsmall)) + c*d*log(10);
    end
else
    g_star = 0; timeF = 0;  %placeholders to avoid error message
end

%Calculate leverage scores
st = cputime;
A = X*X';
A_fact = chol(A);
dis_tmp = A_fact'\X;
distance = sum( dis_tmp .* dis_tmp, 1);
timelev = cputime - st;

%Calculate MVCEs using sampled datasets
for j = 1:sz
    sj = round(s(j));
    if deterministic == 1
        st = cputime;
        
        [~,sampleInd] = maxk(distance,sj); 
        A = X(:,sampleInd);

        %Calculate MVCE(A) where A contains the selected points
        [~,R,factor,~,~,~,~,~,~,~,~] = minvol(A,tol);

        timeD(j) = cputime - st;
        timeD(j) = timelev + timeD(j);

        %Calculate gs_star
        R = R/sqrt(factor);
        XUX = R'*R;
        gs_starD(j) = log(det(XUX));
        
        c = 0;
        while isinf(gs_starD(j)) && c < 100
            c = c + 1;
            XUXsmall = XUX./(10^c);
            gs_starD(j) = log(det(XUXsmall)) + c*d*log(10);
        end
    end
    if random == 1
        %Sample with probability proportionate to leverage scores, assemble into A
        st = cputime;
        pop = 1:n;
        sampleInd = datasample(pop, sj, 'Weights', distance, 'Replace', false);
        A = X(:,sampleInd);

        %Calculate MVCE(A) where A contains the selected points
        [~,R,factor,~,~,~,~,~,~,~,~] = minvol(A,tol);

        timeR(j) = cputime - st;
        timeR(j) = timelev + timeR(j);

        %Calculate gs_star
        R = R/sqrt(factor);
        XUX = R'*R;
        gs_starR(j) = log(det(XUX));
        
        c = 0;
        while isinf(gs_starR(j)) && c < 100
            c = c + 1;
            XUXsmall = XUX./(10^c);
            gs_starR(j) = log(det(XUXsmall)) + c*d*log(10);
        end
    end
    if uniform == 1
        %Sample with uniform probability, assemble into A
        st = cputime;
        [A,timeUinit] = uniform_sample2(sj,X);

        %Calculate MVCE(A) where A contains the selected points
        [~,R,factor,~,~,~,~,~,~,~,~] = minvol(A,tol);

        timeU(j) = cputime - st;
        timeU(j) = timelev + timeU(j);

        %Calculate gs_star
        R = R/sqrt(factor);
        XUX = R'*R;
        gs_starU(j) = log(det(XUX));
        
        c = 0;
        while isinf(gs_starU(j)) && c < 100
            c = c + 1;
            XUXsmall = XUX./(10^c);
            gs_starU(j) = log(det(XUXsmall)) + c*d*log(10);
        end
    end
end

%----------------------------------------------------------------------
%----------------------------------------------------------------------

diffD = g_star*ones(1,sz) - gs_starD;
diffR = g_star*ones(1,sz) - gs_starR;
diffU = g_star*ones(1,sz) - gs_starU;
diff = [diffD; diffR; diffU; g_star*ones(1,sz)];

time = [timeD; timeR; timeU; timeF*ones(1,sz)];
