function [Cost,dCostdTheta] = cogNet_BatchCost(Theta,uList,zTargetList,maskList,actFun,gamma,NeuralNoiseStd,L2regScl)

hDim=size(Theta,1); [uDim,batchSz,numSteps]=size(uList);

%% unpacking
Wrec=Theta(:,1:hDim); Win=Theta(:,hDim+1:hDim+uDim); Bin=Theta(:,hDim+uDim+1);
Wout=Theta(:,hDim+uDim+2:end-1)'; Bout=Theta(1:3,end);


%% simulate the network
% initialize h (driscoll didnt mention how they did this, so im going with zero)
h=zeros(hDim,batchSz);
actPenalty=zeros(1,batchSz);
errorPenalty=0;
% get h trajectory
for k=1:numSteps
    u=uList(:,:,k);
    h = cogNet(h,u,Win,Wrec,Bin,actFun,gamma,NeuralNoiseStd);
    z = Wout*h+Bout;
    zTarg=zTargetList(:,:,k);
    errSqr=(z-zTarg).^2;
    % increment penalties
    actPenalty=actPenalty+sum(h.^2);
    errorPenalty=errorPenalty+sum(maskList(:,:,k).*errSqr,"all");
end



%% calculate cost and gradient

if L2regScl
    actPenalty=sum(sqrt(actPenalty+1e-7))/batchSz;
    weightPenalty=sqrt(sum([Win Wrec Wout'].^2,"all")+1e-7);
    regCost=L2regScl*(actPenalty+weightPenalty);
    Cost=errorPenalty/batchSz + regCost;
else
    actPenalty=[];
    weightPenalty=[];
    regCost=[];
    Cost=errorPenalty/batchSz;

end
dCostdTheta=dlgradient(Cost,Theta);








end