function [s,sd,sf,sx1,sx2] = newGP_kernel_multi_layer(sp,sx1p,sx2p,param,l)
T1 = length(sx1p);
T2 = length(sx2p);
sigmaw = param.sigmaw(l);
sigmav = param.sigmav;
sigmau = param.sigmau(l);
sigmab = param.sigmab(l);
sigmah = param.sigmah;
flag = param.nonlinearity;
u = T2 - T1;
s = zeros(T1,1);
sd = zeros(T1,1);
sx1 = zeros(T1,1);
sx2 = zeros(T2,1);
%% 
[Vphiu,~] = act(sx1p(1),sx1p(1),sx1p(1),flag);
sx1(1) = sigmau^2*Vphiu + sigmab^2 + sigmah^2*sigmaw^2;
for t = 2:T1
   [Vphi,~] = act(sx1(t-1),sx1(t-1),sx1(t-1),flag);
   [Vphiu,~] = act(sx1p(t),sx1p(t),sx1p(t),flag);
   sx1(t) = sigmaw^2*Vphi + sigmau^2*Vphiu + sigmab^2;
end
%%
[Vphiu,~] = act(sx2p(1),sx2p(1),sx2p(1),flag);
sx2(1) = sigmau^2*Vphiu + sigmab^2 + sigmah^2*sigmaw^2;
for t = 2:T2
   [Vphi,~] = act(sx2(t-1),sx2(t-1),sx2(t-1),flag);
   [Vphiu,~] = act(sx2p(t),sx2p(t),sx2p(t),flag);
   sx2(t) = sigmaw^2*Vphi + sigmau^2*Vphiu + sigmab^2;
end
%%
[Vphiu,~] = act(sp(1),sx1p(1),sx2p(1+u),flag);
s(1) = sigmau^2*Vphiu + sigmab^2;
[~,sd(1)] = act(s(1),sx1(1),sx2(1+u),flag);
for t = 2:T1
    [Vphi,~] = act(s(t-1),sx1(t-1),sx2(t-1+u),flag);
    [Vphiu,~] = act(sp(t),sx1p(t),sx2p(t+u),flag);
    s(t) = sigmaw^2*Vphi + sigmau^2*Vphiu + sigmab^2;
    [~,sd(t)] = act(s(t),sx1(t),sx2(t+u),flag);
end
[Vphi,~] = act(s(T1),sx1(T1),sx2(T1+u),flag);
sf = sigmav^2*Vphi; 
end

