function [nn, L]  = nntrain(nn, train_x, train_y, opts, val_x, val_y)
%NNTRAIN trains a neural net
% [nn, L] = nnff(nn, x, y, opts) trains the neural network nn with input x and
% output y for opts.numepochs epochs, with minibatches of size
% opts.batchsize. Returns a neural network nn with updated activations,
% errors, weights and biases, (nn.a, nn.e, nn.W, nn.b) and L, the sum
% squared error for each training minibatch.

assert(isfloat(train_x), 'train_x must be a float');
assert(nargin == 4 || nargin == 6,'number ofinput arguments must be 4 or 6')

loss.train.e               = [];
loss.train.e_frac          = [];
loss.val.e                 = [];
loss.val.e_frac            = [];
opts.validation = 0;
if nargin == 6
    opts.validation = 1;
end

fhandle = [];
if isfield(opts,'plot') && opts.plot == 1
    fhandle = figure();
end

m = size(train_x, 1);

batchsize = opts.batchsize;
numepochs = opts.numepochs;

numbatches = m / batchsize;

assert(rem(numbatches, 1) == 0, 'numbatches must be a integer');

L = zeros(numepochs*numbatches,1);
n = 1;


for i = 1 : numepochs
    tic;
    
    kk = randperm(m);

    
    for l  = 1 : numbatches
        disp('here');
        disp(l);
        batch_x = train_x(kk((l - 1) * batchsize + 1 : l * batchsize), :);
        
        %Add noise to input (for use in denoising autoencoder)
        if(nn.inputZeroMaskedFraction ~= 0)
            batch_x = batch_x.*(rand(size(batch_x))>nn.inputZeroMaskedFraction);
        end
        
        batch_y = train_y(kk((l - 1) * batchsize + 1 : l * batchsize), :);
        
        if l == 1000
            disp('ok');
        end

        nn = nnff2(nn, batch_x, batch_y, l);
        nn = nnbp(nn);
        nn = nnapplygrads(nn);

        seen = nn.W{2};

                      
        wwww = nn.W{2}(:,2:end);

        [wwww1,wwww2] = size(wwww);
        %for ii = 1:wwww1
        %    for jj = 1:wwww2
        %        if ii == jj
        %            wwww(ii,jj) = 0;
        %        end
        %    end
        %end

        nDiag = min(wwww1, wwww2);
        linear_ind = (1:nDiag) + ((1:nDiag)-1)*wwww1;
        wwww(linear_ind) = 0;      

        pppp = sort(abs(wwww(:)),'descend');
        [pppp1,pppp2] = size(pppp);

        %tar = pppp(pppp1*0.1,1);
        tar = pppp(floor(pppp1*nn.top_k),1);
        %tar = 0;
        %ww = prim(wwww);
                        
                        
        %ww = zeros(wwww1,wwww2);

        %for ii = 1:wwww1
        %     for jj = 1:wwww2
        %         if abs(wwww(ii,jj)) >= tar
        %             ww(ii,jj) = wwww(ii,jj);
        %         end
        %     end
        %end
          % 创建与wwww相同大小的零矩阵
        ww = zeros(wwww1, wwww2);
        
        % 使用逻辑索引直接赋值（向量化操作）
        maskk = abs(wwww) >= tar;  % 创建逻辑掩码
        ww(maskk) = wwww(maskk);    % 只对满足条件的元素赋值

        %for i = 1:400
        %    ww(i,i) = seen(i,1);
        %end


        if l == 1
            Gs = graph(ww,'upper');
            pl = plot(Gs);
    
            disp('herh1')
        end

        if l == 10000
            Gs = graph(ww,'upper');
            pl = plot(Gs);
    
            disp('herh1')
        end

        if l == 20000
            Gs = graph(ww,'upper');
            pl = plot(Gs);
    
            disp('herh1')
        end
        
        L(n) = nn.L;
        
        n = n + 1;
        if abs(sum(sum(nn.W{2}))) > 10000
            disp('here');
        end

        disp(abs(sum(sum(nn.W{2}))));
        disp(l);
    end
    


    %clear;
    %load('okok1.mat');
    %disp('okok');
    
    t = toc;

    if opts.validation == 1
        loss = nneval(nn, loss, train_x, train_y, val_x, val_y);
        str_perf = sprintf('; Full-batch train mse = %f, val mse = %f', loss.train.e(end), loss.val.e(end));
    else
        %loss = nneval(nn, loss, train_x, train_y);
        %str_perf = sprintf('; Full-batch train err = %f', loss.train.e(end));
        disp('ok');
    end
    %if ishandle(fhandle)
    %    nnupdatefigures(nn, fhandle, loss, opts, i);
    %end
        
    %disp(['epoch ' num2str(i) '/' num2str(opts.numepochs) '. Took ' num2str(t) ' seconds' '. Mini-batch mean squared error on training set is ' num2str(mean(L((n-numbatches):(n-1)))) str_perf]);
    nn.learningRate = nn.learningRate * nn.scaling_learningRate;

    disp(i);
    disp(i);
    disp(i);
    disp(i);
    disp(i);
    disp(i);
    disp(i);
    disp(i);
    disp(i);
    disp(i);

end


