% parameters

d = 50; % dimension of the data

K = 10; % number of neurons in the hidden layer

N = 300; % number of samples in labeled data

M_1 = 500; % number of samples in unlabeled data (1)

M_2 = 5000; % number of samples in unlabeled data (2)

eps_1 = 0.5; % relative error of the initial point

lambda = min( sqrt(N/(2*K*d)) , 1 ); % value of lamdba

delta_t = 1^2; % Variance of the unlabeled data

eta = 1; % gradient step size

beta = 0.2; % heavy-ball step size

x = randn( d , N ); % input of labeled data

x_s1 = delta_t * randn( d , M_1 ); % unlabeled data (1)

x_s2 = delta_t * randn( d , M_2 ); % unlabeled data (2)

W = 5 *( 1 * rand( d , K ) - 0.5 ); % ground_truth weights

y = y_tl(x, W); % output of labeled data

T = 10; % number of iterations in the inner loop

L = 1000; % number of iterations in the outer loop

% record matrix of the relative error

err = nan( L * T , 2 ); 

% generate initial model

temp = rand( d , K );

W_0 = W + eps_1* temp / norm(temp, 'fro') * norm(W , 'fro');

% iterative self-training algorithm with labled dataset x and unlabeled dataset x_s1

W_t = W_0;

loop_number =  0;

fprintf('Outer loop number =     , Inner Loop number =     ');

for l = 1 : L
    
    y_s1 = y_tl( x_s1 , W_t );
    
    for t = 1 : T
        
        fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%5d, Inner Loop number =%4d\n',l, t);
        
        W_pre = W_t;
        
        G = lambda * Gradient_tl( y, x, W_t) + (1-lambda) * Gradient_tl( y_s1, x_s1, W_t);
        
        W_t = W_t -  eta * G + beta * (W_t - W_pre) ;
        
        loop_number = loop_number +1;

        err( loop_number,1 ) = norm( W_t - W,  'fro' )/ norm( W , 'fro');
        
        if norm(W_pre - W_t, 'fro')/norm(W_pre, 'fro') < 10^(-4)
            
            break;
            
        end
        
    end
           
    
end


% iterative self-training algorithm with labled dataset x and unlabeled dataset x_s2

W_t = W_0;

loop_number =0;

fprintf('Outer loop number =     , Inner Loop number =     ');

for l = 1 : L
    
    y_s2 = y_tl( x_s2 , W_t );
    
    for t = 1 : T
        
        fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%5d, Inner Loop number =%4d\n',l, t);
        
        W_pre = W_t;
        
        G = lambda * Gradient_tl( y, x, W_t) + (1-lambda) * Gradient_tl( y_s2, x_s2, W_t);
        
        W_t = W_t -  eta * G + beta * (W_t - W_pre) ;
        
        loop_number = loop_number +1;
        
        err( loop_number, 2 ) = norm( W_t - W,  'fro' )/ norm( W , 'fro');
        
        if norm(W_pre - W_t, 'fro')/norm(W_pre, 'fro') < 10^(-4)
            
            break;
            
        end
        
    end
    
    
end

% plot the relative error of the returned model

semilogy( err , 'linewidth', 2);

grid on;

axis on; 

xlabel('Number of iterations');

ylabel('Relative error');

legend(['M = ', num2str(M_1)], ['M = ', num2str(M_2)])

set(gca,'fontsize',16,'fontname', 'Times New Roman');




% function to calculate gradient descent

function [G] = Gradient_tl( y, x, W)

y_e = y_tl(x , W );

[d, K] = size( W);

G = zeros( d , K );

for j = 1 : K
    
    G(: , j) = mean( repmat(( y_e - y ) .* (W(:,j)' * x > 0), d , 1 ) .* x, 2);
    
end

end

% function to calculate the (pseudo) label

function [ y ] = y_tl( x , w )

[ ~ , N ] = size( x );

[ ~ , K ] = size( w );

y = zeros( 1,N );

for j = 1 : K
    
    y = y + 1/K * max(w( :, j)' * x,0);
    
end

end