%clear all;
% Parameter
L=2;
sample_s=zeros(15,20);
delta_=50000;
runs=1;
d=5;
test_max=1;
K=3;
W = 1 * randn( d, K );

for j=1:11
    mu=(j-1)/2;
    for i=1:1
        N_test=1000;
        N0 = i*delta_; % sampling number
        %K = 3;    % number of nodes in hidden layer

        loop_n = 2000;

        eta = 0.1/(1+mu*mu); % stepsize of gradient descent

        count=0;
        for test=1:test_max
            %  mix-gaussian parameter
            
            x_1=1*randn(d, N0*0.8)-1;
            x_2=randn(d, N0*0.2)+mu/sqrt(d);
            x_N=cat(2,x_1,x_2);
            r=randperm(size(x_N,2));
            x_N=x_N(:,r);
            
     
            %{
            x_1_t=1*randn(d, N_test*0.8)-1;
            x_2_t=randn(d, N_test*0.2)+mu/sqrt(d);
            x_test=cat(2,x_1_t,x_2_t);
            rt=randperm(size(x_test,2));
            x_test=x_test(:,rt);
            %}
                  
            x_2_t=randn(d, N_test)+mu/sqrt(d);
            x_test=x_2_t;
            rt=randperm(size(x_test,2));
            x_test=x_test(:,rt);
            
            % Generate W

            %W = 1 * randn( d, K );

            % Algorithm
            
            [H_matrix, H_FCN] = Conv_sigmoid_FCN( x_N, W );
            y_N=zeros(1,N0);
            
            for ind=1:N0
                y_N(ind)=binornd(1,H_FCN(ind));
            end
            
            [H_matrix_t, H_FCN_t] = Conv_sigmoid_FCN( x_test, W );
            y_N_t=zeros(1,N_test);
            
            for ind=1:N_test
                y_N_t(ind)=binornd(1,H_FCN_t(ind));
            end
            

            W_out=zeros(d*K,runs);
            for run=1:runs
                temp = randn( d , K );
                W_0 = W + 0.1 * norm( W , 'fro' ) * temp / norm( temp ,  'fro' );
                W_t0=W_0;
                %eta=20/(1+mu^2);
                errit=zeros(loop_n,1);
                for l = 1 : loop_n
           
                    GD=Gradient_crossentropy(x_N,y_N,W_t0);
                    W_t = W_t0 - eta * GD;
                    errit( l ) = norm( W-W_t , 'fro') / norm( W , 'fro');
                    %if isnan(errit(l))
                    %    break;
                    %end
                    
                    W_t0=W_t;
                end
                W_out(:,run)=W_t(:);
            end
            w_bar=mean(W_out,2);
            W_bar=ones(d*K,runs).*w_bar;
            Error=norm(W_bar-W_out,'fro')/sqrt(runs);
            if Error<=1e-3
               count=count+1;
            end
        end
        sample_s(16-i,j)=count/test_max;
        [H_matrix_pt, pred] = Conv_sigmoid_FCN( x_test, reshape(w_bar, [d,K]) );
        err=cross_entropy_loss(y_N_t, pred);

        %l0=length(find(y_N_t==0));
        %ind0=find(y_N_t==0);
        %err0=cross_entropy_loss(y_N_t(ind0), pred(ind0))
        %l1=length(find(y_N_t==1));
        %ind1=find(y_N_t==1);
        %err1=cross_entropy_loss(y_N_t(ind1), pred(ind1))
    end
end
