%clear all;
% Parameter
L=2;
sample_s=zeros(15,20);
delta_=20000;
runs=1;
d=5;
test_max=1;
K=3;
W = 1 * randn( d, K );

SG=[0.1,0.159,0.251, 0.398, 0.631, 1, 1.58, 2.51, 3.98, 6.31, 10];
LM=[0,0.05,0.1,0.15,0.2,0.25,0.3];
for j=1:7
    %sg=50
    lm=LM(j)
    for i=1:1
        N_test=1000;
        N0 = i*delta_; % sampling number
        K = 3;    % number of nodes in hidden layer

        loop_n = 2000;

        eta = 0.1/(1+100*lm); % stepsize of gradient descent

        count=0;
        for test=1:test_max
            %  mix-gaussian parameter
            
            
            x_1=randn(d, round(N0*(1-lm)));
            x_2=10*randn(d, round(N0*lm));
            x_N=cat(2,x_1,x_2);
            r=randperm(size(x_N,2));
            x_N=x_N(:,r);
            
     
            
            x_1_t=randn(d, N_test*0.8);
            x_2_t=10*randn(d, N_test*0.2);
            x_test=cat(2,x_1_t,x_2_t);
            rt=randperm(size(x_test,2));
            x_test=x_test(:,rt);
            

            %{
            x_2_t=10*randn(d, N_test);
            x_test=x_2_t;
            rt=randperm(size(x_test,2));
            x_test=x_test(:,rt);
            %}
            
            % Generate W



            % Algorithm
            
            [H_matrix, H_FCN] = Conv_sigmoid_FCN( x_N, W );
            y_N=zeros(1,N0);
            
            for ind=1:N0
                y_N(ind)=binornd(1,H_FCN(ind));
            end


            
            [H_matrix_t, H_FCN_t] = Conv_sigmoid_FCN( x_test, W );
            y_N_t=zeros(1,N_test);
            
            for ind=1:N_test
                y_N_t(ind)=binornd(1,H_FCN_t(ind));
            end


            W_out=zeros(d*K,runs);
            for run=1:runs
                temp = randn( d , K );
                W_0 = W + 0.1 * norm( W , 'fro' ) * temp / norm( temp ,  'fro' );
                W_t0=W_0;
                %eta=10/(1+sg^2);
                errit=zeros(loop_n);
                for l = 1 : loop_n
           
                    GD=Gradient_crossentropy(x_N,y_N,W_t0);
                    W_t = W_t0 - eta * GD;
                    errit( l ) = norm( W-W_t , 'fro') / norm( W , 'fro');
                    %if isnan(errit(l))
                    %    break;
                    %end
                    
                    W_t0=W_t;
                end
                W_out(:,run)=W_t(:);
            end
            w_bar=mean(W_out,2);
            W_bar=ones(d*K,runs).*w_bar;
            Error=norm(W_bar-W_out,'fro')/sqrt(runs);
            if Error<=1e-3
               count=count+1;
            end
        end
        sample_s(16-i,j)=count/test_max;
        [H_matrix_pt, pred] = Conv_sigmoid_FCN( x_test, reshape(w_bar, [d,K]) );
        err=cross_entropy_loss(y_N_t, pred);

        %l0=length(find(y_N_t==0))
        %ind0=find(y_N_t==0);
        %err0=cross_entropy_loss(y_N_t(ind0), pred(ind0))
        %l1=length(find(y_N_t==1))
        %ind1=find(y_N_t==1);
        %err1=cross_entropy_loss(y_N_t(ind1), pred(ind1))
    end
end

