%% Simulation: V_α Bound vs Mutual Information Bound (Z-Channel, BSC, BEC)
% This script evaluates and visualizes how the proposed V_α-based generalization
% bound compares to the classical mutual information (MI) bound on a Z-channel, 
% Binary Simmetric Channel and Binary Erasure Channel.

%% Figure 1.
% This section evaluates and visualizes how the proposed V_α-based generalization
% bound compares to the classical mutual information (MI) bound on a Z-channel, 
% highlighting the trade-off between the two terms in our bound formulation.

close all; clear all; clc;
% Z-channel parameters
q = 0.8;   % Input distribution: P(X=1)
p = 0.3;   % Z-channel crossover probability (1 -> 0)

% Range of α values for computing V_α
alpha = linspace(1.01, 20, 1000);

% Compute correlation measure V_α
V = V_alpha(alpha, p, q);

% Compute L-norm term over W = {0,1}
supL = max(Lnorm(alpha, 0, q), Lnorm(alpha, 1, q));

% Our proposed bound
our_bound = V .* supL;

% Compute baseline generalization estimate and Raginsky MI bound
sigma2 = 1/4;
gen_hat = ones(size(alpha)) * (2 * q * (1 - p) * (1 - q));
raginsky_bound = ones(size(alpha)) * sqrt(2 * sigma2 * MI_Zchannel(p, q));

% --- Plot: Comparison of Bound Terms ---
figure; hold on;
plot(alpha, supL, 'Color', [0.6, 0.1, 0.15], 'LineStyle', ':', 'LineWidth', 2, ...
    'DisplayName', '$\sup_{w\in \mathcal{W}} || L_S(w) - L_{\mu}(w) ||_{\alpha''}$');
plot(alpha, V, 'm--', 'LineWidth', 2, 'DisplayName', '$V_{\alpha}(S;W)$');
plot(alpha, our_bound, 'b-.', 'LineWidth', 2, ...
    'DisplayName', '$\sup_{w} \|L_S(w) - L_{\mu}(w)\|_{\alpha''} \cdot V_\alpha(S;W)$');
plot(alpha, gen_hat, 'k-', 'LineWidth', 2, 'DisplayName', '$\overline{gen}$');
plot(alpha, raginsky_bound, 'ro-', 'LineWidth', 2, ...
    'DisplayName', '$\sqrt{\frac{2\sigma^2}{n} I(W;S)}$', 'MarkerSize', 2);

% Axis settings
ax = gca;
ax.Box = 'on';
ax.FontSize = 14;
xlabel('$\alpha$', 'Interpreter', 'latex', 'FontSize', 16);
ylabel('Function values', 'Interpreter', 'latex', 'FontSize', 16);
lgd = legend('Interpreter', 'latex', 'FontSize', 14);
lgd.Units = 'normalized';
lgd.Position = [0.7 0.5 0.1 0.1];
shg;

%% Check Tightness at α = 2
% This section computes and prints the numerical values of:
%   - The proposed V_α-based bound
%   - The empirical generalization gap estimate (gen_hat)
%   - The classical mutual information bound (Raginsky)
% to assess the relative tightness of the proposed bound when α = 2.
clear all; clc;
% Z-channel parameters
q = 0.8;   % Input distribution: P(X=1)
p = 0.3;   % Z-channel crossover probability (1 -> 0)
alpha = 2;  % Fixed value of α
sigma2 = 1/4;

% Compute V_α(S;W)
V = V_alpha(alpha, p, q);

% Compute sup-norm term: sup_w || L_S(w) - L_μ(w) ||_α'
supL = max(Lnorm(alpha, 0, q), Lnorm(alpha, 1, q));

% Compute proposed bound: V_α(S;W) * supL
our_bound = V .* supL;

% Compute empirical generalization gap (expected squared loss difference)
gen_hat = 2 * q * (1 - p) * (1 - q);

% Compute Raginsky's mutual information-based bound
raginsky_bound = sqrt(2 * sigma2 * MI_Zchannel(p, q));

% Display numerical values
disp('--- Tightness Check at α = 2 ---');
disp(['Our Bound           : ', num2str(our_bound)]);
disp(['Empirical Gen (hat) : ', num2str(gen_hat)]);
disp(['Raginsky Bound      : ', num2str(raginsky_bound)]);

%% Figure 3: BSC Channel — Special Case α = 2
% This section compares the proposed V_α-based bound with the classical mutual information
% bound for a Binary Symmetric Channel (BSC) under uniform input distribution and α = 2.

clear all; clc;

% Channel parameter (flip probability)
p = 0:0.001:1;

% Compute mutual information I(S;W) for the BSC
Hb_p = -p .* log2(p) - (1 - p) .* log2(1 - p); % Binary entropy
Hb_p(isnan(Hb_p)) = 0; % Handle 0*log(0) = 0
MI = 1 - Hb_p;          % I(X;Y) = 1 - H(p) for BSC
MI_Raginsky = sqrt(2 * MI); % Raginsky bound

% Compute V_α for α = 2
v_alpha = abs(1 - 2 .* p);

% Plot
figure;
plot(p, v_alpha, 'b-', 'LineWidth', 2, 'DisplayName', '$V_{\alpha}(S;W)$');
hold on;
plot(p, MI_Raginsky, 'r--', 'LineWidth', 2, 'DisplayName', '$\sqrt{2I(S:W)}$');

% Formatting
ax = gca;
ax.Box = 'on';
ax.FontSize = 14;
legend('Interpreter', 'latex', 'FontSize', 14);
xlabel('$p$', 'Interpreter', 'latex', 'FontSize', 16);
ylabel('Function values', 'Interpreter', 'latex', 'FontSize', 16);


%% Figure 4: BEC Channel — Special Case α = 2
% This section compares V_α and MI bounds for the Binary Erasure Channel (BEC)
% under uniform input and α = 2.

clear all; clc;

alpha = 2;
k = 1;                             % Number of samples 
abs_A = 2^k;                       % Alphabet size = 2
epsilon = 0:0.001:1;               % Erasure probability

% Compute V_α for the BEC at α = 2
v_alpha = (1 - epsilon) .* (1 - 1 / abs_A) .* ...
    (abs_A.^(alpha - 1) + (1 - 1 / abs_A).^(1 - alpha)).^(1 / alpha);

% Compute MI bound
MI = (1 - epsilon) .* log2(abs_A);
MI_Raginsky = sqrt(2 * MI);

% Plot
figure;
plot(epsilon, v_alpha, 'b-', 'LineWidth', 2, 'DisplayName', '$V_{\alpha}(S;W)$');
hold on;
plot(epsilon, MI_Raginsky, 'r--', 'LineWidth', 2, 'DisplayName', '$\sqrt{2I(S:W)}$');

% Formatting
ax = gca;
ax.Box = 'on';
ax.FontSize = 14;
legend('Interpreter', 'latex', 'FontSize', 14);
xlabel('$\epsilon$', 'Interpreter', 'latex', 'FontSize', 16);
ylabel('Function values', 'Interpreter', 'latex', 'FontSize', 16);


%% Figure 5: Z Channel — Special Case α = 2
% V_α Bound vs Mutual Information Bound for Special Case α=2 (Z-Channel)

% Define constants
n = 1;              % Number of sample size (assumed 1 for this case)
sigma2 = 1/4;       
q = 0.5;            % Input distribution: P(X=1)
p = [0:0.001:1];    % Z-channel crossover probability (1 -> 0) ranging from 0 to 1 with a step size of 0.001

% Compute Raginsky Bound
raginsky_bound = sqrt(2*sigma2.*MI_Zchannel(p, q)./n);  % Compute bound based on mutual information

% Compute our bound using alpha=2
alpha = 2;             
V = V_alpha(alpha, p, q);   % Compute the value of V_alpha based on alpha, p, and q

% Compute the supremum of Lnorm for w = 0 and w = 1
w = [0, 1];   
supL = max(Lnorm(alpha, 0, q), Lnorm(alpha, 1, q));  % Get maximum of Lnorm for two w values

% Calculate the bound using V_alpha and supremum Lnorm
our_bound = V .* supL; 

% Plotting the results
figure

% Plot our bound with blue line
plot(p, our_bound, 'b-', 'LineWidth', 2, 'DisplayName', '$$\sup_{w\in \mathcal{W}} || L_S(w) - L_{\mu}(w) ||_{\alpha''} \cdot V_{\alpha}(S;W)$');

% Add Raginsky bound with red line
hold on
plot(p, raginsky_bound, 'r--', 'LineWidth', 2, 'DisplayName', '$\sqrt{\frac{2\sigma^2}{n}I(S:W)}$');

% Customize plot appearance
ax = gca;
ax.Box = 'on';             
ax.FontSize = 14;          

% Add legend 
legend('Interpreter', 'latex', 'FontSize', 14);

% Add axis labels
xlabel('p', 'Interpreter', 'latex', 'FontSize', 16);  
ylabel('Function values', 'Interpreter', 'latex', 'FontSize', 16); 


%% Functions
% This section defines all helper functions used throughout the simulations:
%
% - L_mu(w, q): Computes the expected loss over the marginal distribution μ for a given w.
function L = L_mu(w, q)
    L = (1-q).*(w-0).^2+q.*(w-1).^2;
end
% - L_s(w, z): Computes the empirical loss for sample s = z and hypothesis w.
function L = L_s(w, z)
    L = (w-z).^2;
end
% - Lnorm(alpha, w, q): Computes the Hölder L^p-norm term in our bound, based on the data distribution Bernoulli(q).
function L = Lnorm(alpha, w, q)
    alphaprime = 1./(1-1./alpha);
    L = (q.*(abs(L_mu(w, q)-L_s(w, 1)).^alphaprime)+(1-q).*(abs(L_mu(w, q)-L_s(w, 0)).^alphaprime)).^(1./alphaprime);
end
% - V_alpha(alpha, p, q): Computes the correlation measure V_α(S;W) for the Z-channel.
function v = V_alpha(alpha, p, q)
    w0 = ((1-q).*(abs(q-p.*q).^alpha)+q.*(abs(p+q-1-p.*q).^alpha)).^(1./alpha);
    w1 = ((1-q).*(abs(q-p.*q).^alpha)+q.*(abs(p+q-1-p.*q).^alpha)).^(1./alpha);
    v = w0+w1;
end
% - Hb(p): Binary entropy function.
function H = Hb(p)
    H = -p.*log2(p)-(1-p).*log2(1-p);
end
% - MI_Zchannel(p, q): Mutual information I(S;W) under a Z-channel with general input distribution Bernoulli(q).
function M = MI_Zchannel(p, q)
    M = Hb((1-p).*q)-q.*Hb(p);
end
