% Part I: prepare the dataset

load('COMPAS.mat'); % COMPAS.mat contains double matrix A with size 4115*16, 
                    % double vector b with size 4115*1,
                    % double matrix Ac with size 2057*16 
                    % and double vector bc with size 2057*1.
                    
% On the page https://github.com/propublica/compas-analysis/, 
% the raw data is in compas-scores-two-years.csv with 7214 instances.
% Select a subset with 6172 instances from the whole 7214 instances based 
% on whether the charge date of a defendants compas scored crime is within 
% 30 days from when the person is arrested. 
% If not, we assume that because of data quality reasons we do not have the right offense.
% [A, b] and [Ac, bc] are divided from these 6172 instances.

% i) [A, b] is for constructing the objective.

[n,d] = size(A); % number of instances n = 4115, feature dimension d = 16
numclass = length(unique(b)); % b is vector of class labels.
% There are two labels: 1 and -1, so numclass = 2.

% sensitive variable, abbreviated by sv, concerns the fairness.
sv = A(:,16); % sv = 1 for "male" and = 0 for "female"
idmale = find(sv==1); % indexes in [n] of instances for "male"
idfemale = find(sv==0); % indexes in [n] of instances for "female"

nummale = sum(sv==1); % number of instances for "male" = 3304
numfemale = sum(sv==0); % number of instances for "female" = 811

numpos = sum(b==1); % number of instances with positive, i.e. label 1 = 1899
numneg = sum(b==-1); % number of instances with negative, i.e. label -1 = 2216

numposmale = sum(b==1 & sv==1); % number of instances for "male" and with label 1 = 1627
numnegmale = sum(b==-1 & sv==1); % number of instances for "male" and with label -1 = 1677
numposfemale = sum(b==1 & sv==0); % number of instances for "female" and with label 1 = 272
numnegfemale = sum(b==-1 & sv==0); % number of instances for "female" and with label -1 = 539

% ii) [Ac, bc] is for constructing the constraint.

[nc,dc] = size(Ac); % number of instances n = 2057, feature dimension d = 16
numclassc = length(unique(bc)); % bc is vector of class labels.
% There are two labels: 1 and -1, so numclass = 2.

% sensitive variable, abbreviated by sv, concerns the fairness.
svc = Ac(:,16); % sv = 1 for "male" and = 0 for "female"
idmalec = find(svc==1); % indexes in [nc] of instances for "male"
idfemalec = find(svc==0); % indexes in [nc] of instances for "female"

nummalec = sum(svc==1); % number of instances for "male" = 1693
numfemalec = sum(svc==0); % number of instances for "female" = 364

numposc = sum(bc==1); % number of instances with positive, i.e. label 1 = 910
numnegc = sum(bc==-1); % number of instances with negative, i.e. label -1 = 1147

numposmalec = sum(bc==1 & svc==1); % number of instances for "male" and with label 1 = 769
numnegmalec = sum(bc==-1 & svc==1); % number of instances for "male" and with label -1 = 924
numposfemalec = sum(bc==1 & svc==0); % number of instances for "female" and with label 1 = 141
numnegfemalec = sum(bc==-1 & svc==0); % number of instances for "female" and with label -1 = 223

% Part II: initialize w^(0) by zero vector
beta = ( mean(vecnorm(Ac(idmalec,:),2,2).^2) + mean(vecnorm(Ac(idfemalec,:),2,2).^2) ) / 4;
SCAD_coefficient = 0.2;
rho = max(beta, 2*SCAD_coefficient);
rho_hat = max(rho, 1) * 1; % select from {1, 1.5, 2}

M = max( sqrt( eigs( ((A').*b') * (b.*A), 1 ) ) / n, mean(vecnorm(Ac,2,2)) );

kappa = 0.02;

w_init = zeros(d,1); 
% D_X = 10;

% verify_kappa

% Part III: solve the constrained problem by three methods

% apply the SSG method by constant step-size
% to solve the constrained problem
SSG_deterministic_weakly_convex_minimization
w_SSG_result = w;

% apply the IPP-SSG method to solve the constrained problem
IPP_SSG_deterministic_weakly_convex_minimization 
w_IPP_SSG_result = w;
 
% apply the IPP-ConEx method to solve the constrained problem
IPP_ConEx_deterministic_weakly_convex_minimization 
w_IPP_ConEx_result = w;

% Part IV: visualize the results

title_name = "COMPAS";
% deterministic_weakly_convex_visualization