function[W,H,loss] = NMF(X, k, lamda1, lamda2, alpha)
%inputs
%X is the data-matrix to decompose
%k is the desired embedding length
%lamda1 is l1 regularization weigth
%lamda2 is l2 regularization weigth
%alpha is the confidence in the zeros

%outputs
%W rows contain the low-simensional representations of the objects in the
%rows of X
%H columns contain the low-simensional representations of the objects in the
%columns of X
%loss contains the loss term after each iteration of the learning

[n,m] = size(X);
sqrteps = sqrt(eps);

W0 = 0.1*rand(n,k);
H0 = 0.1*rand(k,m);
H0 = H0./repmat(sqrt(sum(H0.^2,2)),1,m);

P = zeros(n,m) + alpha;
P(X>0) = 1;

loss = 0.5*norm(P.*(X-W0*H0),'fro')^2 + 0.5*lamda2*norm(W0,'fro')^2 + 0.5*lamda2*norm(H0,'fro')^2 + lamda1*(norm(W0,1) + norm(H0,1));
i = 1;
delta = 1;

while (delta>1e-3 && i<10000) 
    
    num = (P.^2).*X;
    W = max(0,(W0.*(num*H0'))./(((P.^2).*(W0*H0))*H0' + lamda2*W0 + lamda1*sign(W0)  + sqrteps));
    H = max(0,(H0.*(W'*num))./(W'*((P.^2).*(W*H0)) + lamda2*H0 + lamda1*sign(H0) + sqrteps));
    
    

    
    delta_W = max(max(abs(W - W0)))/(sqrteps + max(max(abs(W0))));
    delta_H = max(max(abs(H - H0)))/(sqrteps + max(max(abs(H0))));
    
    loss(i) = 0.5*norm(P.*(X-W*H),'fro')^2 + 0.5*lamda2*norm(W,'fro')^2 + 0.5*lamda2*norm(H,'fro')^2 + lamda1*(norm(W0,1) + norm(H0,1));
    W0 = W;
    H0 = H;
    
    i = i + 1;
    
    delta = max(delta_W, delta_H);


end


end