% 
% % MSE plot of 5 by 5
clear all 


f1cell = cell(1,5);
f1cell{1} = 'pi_12';
f1cell{2} = 'pi_4';
f1cell{3} = 'pi_2';
f1cell{4} = '3pi_4';
f1cell{5} = '11pi_12';

f2cell = cell(1,5);
f2cell{1} = '10_1';
f2cell{2} = '2_1';
f2cell{3} = '1_1';
f2cell{4} = '05_1';
f2cell{5} = '01_1';

titlecell = cell(1,5);
titlecell{1} = '$10\|\vec{x}_1\|^2 = \|\vec{x}_2\|^2$';
titlecell{2} = '$2\|\vec{x}_1\|^2 = \|\vec{x}_2\|^2$';
titlecell{3} = '$\|\vec{x}_1\|^2 = \|\vec{x}_2\|^2$';
titlecell{4} = '$\frac{1}{2}\|\vec{x}_1\|^2 = \|\vec{x}_2\|^2$';
titlecell{5} = '$\frac{1}{10}\|\vec{x}_1\|^2 = \|\vec{x}_2\|^2$';

ylabelcell = cell(1,5);

ylabelcell{1} = '$\theta = \frac{\pi}{12}$';
ylabelcell{2} = '$\theta = \frac{\pi}{4}$';
ylabelcell{3} = '$\theta = \frac{\pi}{2}$';
ylabelcell{4} = '$\theta = \frac{3\pi}{4}$';
ylabelcell{5} = '$\theta = \frac{11\pi}{12}$';


% Actual Plotting Code
% For honesty, we should ensure our inner products are within
% the correct bounds by cosine rule for all estimates
% (Note that the code for NR, CVE, Secant already accounts for this)
m1vec = [10 2 1 0.5 0.1];
m2 = 1;
m1m2 = sqrt(m1vec*m2);

TITLEFONTSIZE = 40;
YLABELSIZE = 50;
kvec = 1:100;
for i = 1:5
	for j = 1:5
		index = (i-1)*5+j % 1:25
		sp = subplot(5,5,index); % subplots from 1-25
		sp.Position(1) = sp.Position(1) - 0.05;			

		filetoload = ['FH_estimates_', num2str(f1cell{i}), '_',  num2str(f2cell{j}), '.mat'];

		load(filetoload);

		est_mat_orig(est_mat_orig > m1m2(j)) = m1m2(j);
		est_mat_orig(est_mat_orig < -m1m2(j)) = -m1m2(j);

		CV_values_mat(CV_values_mat > m1m2(j)) = m1m2(j);
		CV_values_mat(CV_values_mat < -m1m2(j)) = -m1m2(j);
		
		est_mat_empirical_CV(est_mat_empirical_CV > m1m2(j)) = m1m2(j);
		est_mat_empirical_CV(est_mat_empirical_CV < -m1m2(j)) = -m1m2(j);

		semilogy(kvec,mean((est_mat_orig - true_IP).^2), 'k', 'DisplayName', 'Baseline Estimate'); hold all
		
		plot(kvec,mean((est_mat_NR - true_IP).^2), 'b-o', 'DisplayName', 'MLE-NR');
		plot(kvec,mean((est_mat_sec - true_IP).^2), 'm-+', 'DisplayName', 'MLE-Secant');
		plot(kvec,mean((est_mat_CV - true_IP).^2), 'r-*', 'DisplayName', 'CV-EM');
		% CV after using 1 update step is equivalent to using initial
		% estimate of sufficient statistic ; corresponding to the
		% first step after initial estimate
		plot(kvec,mean((squeeze(CV_values_mat(:,2,:)) - true_IP).^2), 'r-.', 'DisplayName', 'CV-Initial');
		plot(kvec,mean((est_mat_empirical_CV - true_IP).^2), 'b-x', 'DisplayName', 'CV-Empirical');

		set(sp, 'LineWidth', 2);

		xlim([1,100])

		% Are there any special indexes?
        
      % Titles at top
		if i == 1 
			title([titlecell{j}], 'interpreter','latex','FontWeight', 'bold','FontSize', TITLEFONTSIZE, 'FontName', 'Arial Black');
		end

		if i ~=5
			set(gca, 'xticklabel', []);
		end
		if j ~=1
			set(gca, 'yticklabel', []);
		end


		if index == 5
			lgd = legend('-DynamicLegend', 'location', 'northeast');
			lgd.FontSize = 28;
			lgd.LineWidth = 2; 
		end
		grid on;
		b = gca; 
		b.XAxis.FontSize = 25; 
		b.YAxis.FontSize = 25; 
       
		% y axis label at sides
		if j == 1
			ylabel(ylabelcell{i}, 'interpreter','latex', 'FontWeight', 'bold','FontSize', YLABELSIZE, 'FontName', 'Arial Black');
        end
	end
end


% Make lines thicker
lines = findobj(gcf, 'Type', 'Line');
for j = 1:150
	set(lines(j), 'LineWidth', 2); 
end

annotation('textbox', [0.25, 0.02, 0.8, 0.06], 'String', 'MSE of feature hashing estimates, k = 1:100', 'FontName', 'Arial Black', 'EdgeColor', 'none', 'FontSize', 35);



        
       

% Enlarge figure to full screen.
set(gcf, 'Units', 'Normalized', 'OuterPosition', [0 0 1 1]);        
set(gcf, 'PaperPosition', [0 0 65 30])    % can be bigger than screen 
print(gcf, 'FH_5by5_new.png', '-dpng', '-r600' );   %save file as PNG w/ 600dpi
exportgraphics(gcf,'FH_5by5_new.pdf','Resolution',300)
close;

%%%%%%%%%% Make one particular plot %%%%%%%%%%%%%

clear all;
TITLEFONTSIZE = 40;
YLABELSIZE = 70;
load('FH_estimates_pi_12_1_1.mat');

% For honesty, we should ensure our inner products are within
% the correct bounds by cosine rule for all estimates
% (Note that the code for NR, CVE, Secant already accounts for this)

est_mat_orig(est_mat_orig > 1) = 1;
est_mat_orig(est_mat_orig < -1) = -1;

CV_values_mat(CV_values_mat > 1) = 1;
CV_values_mat(CV_values_mat < -1) = -1;
		
est_mat_empirical_CV(est_mat_empirical_CV > 1) = 1;
est_mat_empirical_CV(est_mat_empirical_CV < -1) = -1;

kvec = 1:100;
semilogy(kvec,mean((est_mat_orig - true_IP).^2), 'k', 'DisplayName', 'Baseline Estimate'); hold all
		
plot(kvec,mean((est_mat_NR - true_IP).^2), 'b-o', 'DisplayName', 'MLE-NR');
plot(kvec,mean((est_mat_sec - true_IP).^2), 'm-+', 'DisplayName', 'MLE-Secant');
plot(kvec,mean((est_mat_CV - true_IP).^2), 'r-*', 'DisplayName', 'CV-EM');

% CV after using 1 update step is equivalent to using initial
% estimate of sufficient statistic ; corresponding to the
% first step after initial estimate
plot(kvec,mean((squeeze(CV_values_mat(:,2,:)) - true_IP).^2), 'r-.', 'DisplayName', 'CV-Initial');
plot(kvec,mean((est_mat_empirical_CV - true_IP).^2), 'b-x', 'DisplayName', 'CV-Empirical');

xlim([1,100])

title(['Plot of $\|\vec{x}_1\|^2 = \|\vec{x}_2\|^2$ with $\theta = \frac{\pi}{12}$'], 'interpreter','latex','FontWeight', 'bold','FontSize', TITLEFONTSIZE, 'FontName', 'Arial Black');

ylabel('Mean Squared Error', 'FontWeight', 'bold','FontSize', YLABELSIZE, 'FontName', 'Arial Black');
xlabel('Number of observations k = 1:100', 'FontWeight', 'bold','FontSize', YLABELSIZE, 'FontName', 'Arial Black');

% Legend at top right

lgd = legend('-DynamicLegend', 'location', 'northeast');
lgd.FontSize = 30;
lgd.LineWidth = 2; 
grid on;
b = gca;
b.FontSize = 30;
% Enlarge figure to full screen.
set(gcf, 'Units', 'Normalized', 'OuterPosition', [0 0 1 1]);
exportgraphics(gcf,'Single_MSE_FH.pdf','Resolution',300)
exportgraphics(gcf,'Single_MSE_FH.png','Resolution',300)
close

% % % Boxplots: k = 10
% % % Boxplots: k = 20
clear all;
load('FH_estimates_pi_12_1_1.mat');
% Set estimates of inner product to be within bounds via cosine rule
est_mat_orig(est_mat_orig > 1) = 1;
est_mat_orig(est_mat_orig < -1) = -1;

CV_values_mat(CV_values_mat > 1) = 1;
CV_values_mat(CV_values_mat < -1) = -1;
		
est_mat_empirical_CV(est_mat_empirical_CV > 1) = 1;
est_mat_empirical_CV(est_mat_empirical_CV < -1) = -1;


sp = subplot(1,2,1);
sp.Position(1) = sp.Position(1) - 0.08;	
sp.Position(3) = sp.Position(3) + 0.05;	
k = 10;

% Set estimates of inner product to be within bounds via cosine rule

boxplot([est_mat_orig(:,k) , est_mat_NR(:,k), est_mat_sec(:,k), est_mat_CV(:,k), squeeze(CV_values_mat(:,2,k)),est_mat_empirical_CV(:,k)],'Labels',{'FH','MLE-NR', 'MLE-Secant', 'CV-EM', 'CV-Init', 'CV-Emp'})
grid on;
b = gca; 
b.XAxis.FontSize = 30; 
b.YAxis.FontSize = 30; 

title('      Estimates at k = 10', 'FontSize', 40, 'FontWeight', 'bold','FontName', 'Arial Black')
ylabel('        Inner Product Estimates', 'FontWeight', 'bold', 'FontSize', 40,'FontName', 'Arial Black');

line(xlim, [true_IP true_IP], 'Color', 'blue', 'LineWidth', 2,'LineStyle', ':');
sp = subplot(1,2,2);
sp.Position(1) = sp.Position(1) - 0.08;	
sp.Position(3) = sp.Position(3) + 0.05;	
k = 20;
% Set estimates of inner product to be within bounds via cosine rule
% of initial estimate, and CV-init, and CV-emp 


boxplot([est_mat_orig(:,k) , est_mat_NR(:,k), est_mat_sec(:,k), est_mat_CV(:,k), squeeze(CV_values_mat(:,2,k)),est_mat_empirical_CV(:,k)],'Labels',{'FH','MLE-NR', 'MLE-Secant', 'CV-EM', 'CV-Init', 'CV-Emp'})
grid on;
b = gca; 
b.XAxis.FontSize = 30; 
b.YAxis.FontSize = 30; 

title('      Estimates at k = 20', 'FontSize', 40, 'FontWeight', 'bold','FontName', 'Arial Black')

line(xlim, [true_IP true_IP], 'Color', 'blue', 'LineWidth', 2,'LineStyle', ':');

% Enlarge figure to full screen.
set(gcf, 'Units', 'Normalized', 'OuterPosition', [0 0 1 1]);
exportgraphics(gcf,'side_by_sidebp_FH.pdf','Resolution',300)
exportgraphics(gcf,'side_by_sidebp_FH.png','Resolution',300)
close;

% % % 
% % % Data for table in Appendix
k = 10
% 'K=10_NR'
find_outlier_ratio(est_mat_NR(:,k))
% 'K=10_Sec'
find_outlier_ratio(est_mat_sec(:,k))
% 'K=10_CV-EM'
find_outlier_ratio(est_mat_CV(:,k))
% 'K=10_CV_Init'
find_outlier_ratio(squeeze(CV_values_mat(:,2,k)))
% 'K=10_CV_Emp'
find_outlier_ratio(est_mat_empirical_CV(:,k))
k = 20
find_outlier_ratio(est_mat_NR(:,k))
find_outlier_ratio(est_mat_sec(:,k))
find_outlier_ratio(est_mat_CV(:,k))
find_outlier_ratio(squeeze(CV_values_mat(:,2,k)))
find_outlier_ratio(est_mat_empirical_CV(:,k))

% This shows # cubic roots
k = 10
length(find(cubic_roots_mat(:,k) == 1))
k = 20
length(find(cubic_roots_mat(:,k) == 1))


% % % This shows average number of iterations
clear all;
load('FH_estimates_pi_12_1_1.mat');

figure;
hold on;
grid on;
for j = 1:5
	index = (j-1)*5 + 1;
	boxplot(NR_iter_mat(:,j*20), 'Positions', index, 'Colors', 'k');
end

for j = 1:5
	index = (j-1)*5 + 2;
	boxplot(sec_iter_mat(:,j*20), 'Positions', index,'Colors', 'g');
end

for j = 1:5
	index = (j-1)*5 + 3;
	boxplot(CV_iter_mat(:,j*20), 'Positions', index, 'Colors', 'b');
end

set(gca, 'XTick', [2 7 12 17 22] ,'XTickLabel', {'k=20', 'k=40', 'k=60', 'k=80', 'k=100'});

xlabel('      Number of observations k', 'FontWeight', 'bold', 'FontSize', 40,'FontName', 'Arial Black');
ylabel('            Update steps till convergence', 'FontWeight', 'bold', 'FontSize', 40,'FontName', 'Arial Black');
b = gca; 
b.XAxis.FontSize = 35; 
b.YAxis.FontSize = 35; 
title('                 Boxplots for update steps till convergence at respective k', 'FontSize', 40, 'FontWeight', 'bold','FontName', 'Arial Black');

h1 = findobj(gca,'Tag','Box','Color','k');
h2 = findobj(gca,'Tag','Box','Color','g');
h3 = findobj(gca,'Tag','Box','Color','b');

set(findobj(gca, 'Type', 'line', 'Tag', 'Median'), 'LineWidth', 2); 
set(findobj(gca, 'Type', 'line', 'Tag', 'Box'), 'LineWidth', 2);  
set(findobj(gca, 'Type', 'line', 'Tag', 'Upper Whisker'), 'LineWidth', 2);
set(findobj(gca, 'Type', 'line', 'Tag', 'Lower Whisker'), 'LineWidth', 2);

lgd = legend([h1(1), h2(1), h3(1)], {'MLE-NR', 'MLE-Secant', 'CV-EM'}, 'Location', 'northeast');

lgd.FontSize = 30;
lgd.LineWidth = 2; 
% Enlarge figure to full screen.
set(gcf, 'Units', 'Normalized', 'OuterPosition', [0 0 1 1]);
exportgraphics(gcf,'boxplot_update_steps_FH.pdf','Resolution',300)
exportgraphics(gcf,'boxplot_update_steps_FH.png','Resolution',300)
close;

% % Consider convergence rate
clear all;
f1cell = cell(1,5);
f1cell{1} = 'pi_12';
f1cell{2} = 'pi_4';
f1cell{3} = 'pi_2';
f1cell{4} = '3pi_4';
f1cell{5} = '11pi_12';

f2cell = cell(1,5);
f2cell{1} = '10_1';
f2cell{2} = '2_1';
f2cell{3} = '1_1';
f2cell{4} = '05_1';
f2cell{5} = '01_1';

iter=10000;
k = 100;
% Look at the "best optimal case" for all 3 algorithms to determine convergence rate, and in the case where there is only one real root
NR_slopes_global = [];
sec_slopes_global = [];
CV_slopes_global = [];

for i = 1:5
	for j = 1:5
		filetoload = ['FH_estimates_', num2str(f1cell{i}), '_',  num2str(f2cell{j}), '.mat'];
		load(filetoload);


		NR_slopes = zeros(2,iter);
		sec_slopes = zeros(2,iter);
		CV_slopes = zeros(2,iter);

		[i j]
		for its = 1:iter
			rvals = squeeze(NR_values_mat(its,:,k));
			idx = find(~isnan(abs(rvals - real_roots_mat(its,k))));
			rvals = abs(rvals - real_roots_mat(its,k));
			rvals = rvals(idx);
			log_en = log(rvals(1:end-1));
			log_enen = log(rvals(2:end));
			ls = polyfit(log_en, log_enen, 1);
			NR_slopes(:,its) = polyfit(log_en, log_enen, 1);

			rvals = squeeze(sec_values_mat(its,2:end,k));
			idx = find(~isnan(abs(rvals - real_roots_mat(its,k))));
			rvals = abs(rvals - real_roots_mat(its,k));
			rvals = rvals(idx);
			log_en = log(rvals(1:end-1));
			log_enen = log(rvals(2:end));
			sec_slopes(:,its)  = polyfit(log_en, log_enen, 1);

			rvals = squeeze(CV_values_mat(its,:,k));
			idx = find(~isnan(abs(rvals - real_roots_mat(its,k))));
			rvals = abs(rvals - real_roots_mat(its,k));
			rvals = rvals(idx);
			log_en = log(rvals(1:end-1));
			log_enen = log(rvals(2:end));
			ls = polyfit(log_en, log_enen, 1);
			CV_slopes(:,its)  = polyfit(log_en, log_enen, 1);
		end
		NR_slopes_global = [NR_slopes_global NR_slopes];
		sec_slopes_global = [sec_slopes_global sec_slopes];
		CV_slopes_global = [CV_slopes_global CV_slopes];
	end
end

idxNR = find(~isnan(NR_slopes_global(2,:)));
NR_slopes_global = NR_slopes_global(:,idxNR);

idxsec = find(~isnan(sec_slopes_global(2,:)));
sec_slopes_global = sec_slopes_global(:,idxsec);

idxCV = find(~isnan(CV_slopes_global(2,:)));
CV_slopes_global = CV_slopes_global(:,idxCV);


% Side by side boxplots
sp = subplot(1,2,1);
sp.Position(1) = sp.Position(1) - 0.08;	
sp.Position(3) = sp.Position(3) + 0.05;	

hold on;
boxplot(NR_slopes_global(1,:), 'Positions', 0, 'Colors', 'k');
boxplot(sec_slopes_global(1,:), 'Positions', 1, 'Colors', 'g');
boxplot(CV_slopes_global(1,:), 'Positions', 2, 'Colors', 'b');
set(gca, 'XTick', [0 1 2] ,'XTickLabel', {'MLE-NR', 'MLE-Secant', 'CV-EM'});

%boxplot([NR_slopes_global(1,:), sec_slopes_global(1,:), CV_slopes_global(1,:)],'Labels',{'MLE-NR','MLE-Secant', 'CV-EM'})
grid on;
b = gca; 
b.XAxis.FontSize = 35; 
b.YAxis.FontSize = 35; 

title('            Plot of empirical alphas for each run', 'FontSize', 40, 'FontWeight', 'bold','FontName', 'Arial Black')
line(xlim, [1 1], 'Color', 'blue', 'LineWidth', 1,'LineStyle', ':');
line(xlim, [2 2], 'Color', 'blue', 'LineWidth', 1,'LineStyle', ':');



sp = subplot(1,2,2);
sp.Position(1) = sp.Position(1) - 0.08;	
sp.Position(3) = sp.Position(3) + 0.05;	

hold on;
boxplot(exp(NR_slopes_global(2,:)), 'Positions', 0, 'Colors', 'k');
boxplot(exp(sec_slopes_global(2,:)), 'Positions', 1, 'Colors', 'g');
boxplot(exp(CV_slopes_global(2,:)), 'Positions', 2, 'Colors', 'b');
set(gca, 'XTick', [0 1 2] ,'XTickLabel', {'MLE-NR', 'MLE-Secant', 'CV-EM'});

%boxplot([(NR_slopes_global(2,:)'), (sec_slopes_global(2,:)'), (CV_slopes_global(2,:)')],'Labels',{'MLE-NR','MLE-Secant', 'CV-EM'})
grid on;
b = gca; 
b.XAxis.FontSize = 35; 
b.YAxis.FontSize = 35; 



title('           Plot of empirical Cs for each run', 'FontSize', 40, 'FontWeight', 'bold','FontName', 'Arial Black')

% Enlarge figure to full screen.
set(gcf, 'Units', 'Normalized', 'OuterPosition', [0 0 1 1]);
exportgraphics(gcf,'boxplot_convergence_FH.pdf','Resolution',300)
exportgraphics(gcf,'boxplot_convergence_FH.png','Resolution',300)
close;
