function [Cost,dCostdTheta,RMSE,RMSE_TPsplit,flexPenalty,svPenalty,sensePenalty,signalPenalty,actPenalty] = ...
    computeBatchCostTmaze(Theta,uList,zTargetList,fixationList, TPindicatorList, ...
    actFun,gamma,NeuralNoiseStd,signalRegWeight,senseRegWeight,actRegWeight,svRegWeight,flexConWeight,deltaT,tpIDs,DriftStd,neurDistMat,origDistMat)

tpIDs=permute(tpIDs,[1 3 2]);
numTPs=length(tpIDs);

%% getting the dimensions of things
hDim=size(Theta,1); 
[uDim,batchSz,numSteps]=size(uList);
numSteps=numSteps-1;
zDim = size(zTargetList,1);
fixDim=size(fixationList,1);

%% unpacking theta=[Wrec Win Bin Wout' Bout Wfeedback Wfixation h0]
[Win,Wrec,Bin,Wfeedback,Wfixation,Wout,Bout,h0] = unpackTheta(Theta,hDim,uDim,zDim,fixDim);
startPtNums=TPindicatorList(1,:,end);
endPtNums=TPindicatorList(end-1,:,end);
h0=actFun(h0(:,startPtNums));% make sure starting activations are positive
if flexConWeight
    hfTarget=actFun(h0(:,endPtNums));
end
%% simulate the network
% initialize h, which is the neural state
h = h0;  
z = zTargetList(:,:,1); % initial state for position % manoj to do: should we add noise to the initial position?


numel_TPsplit=zeros(numTPs,1);
SSE_TPsplit=numel_TPsplit;
actPenalty=0;
signalPenalty=0;
sensePenalty=0;
svPenalty=0;
flexPenalty=0;
if signalRegWeight
    WrecDist=abs(Wrec.*neurDistMat);
    WoutDist=abs(Wout.*origDistMat');
end
% get h trajectory (neural trajectory)
DriftStd=DriftStd/sqrt(zDim);
for iStep=1:numSteps
    u = uList(:,:,iStep); % input now, for the k-th timestep
    fixationNow = fixationList(:,:,iStep);
    dz=(Wout*h + Bout)*deltaT;
    h = ...
        TmazeNet(h,u,z,fixationNow,Win,Wrec,Bin,Wfeedback,Wfixation, ...
        actFun,gamma,NeuralNoiseStd); % recurrent neural network
    
    % increment penalties
    if actRegWeight
        actPenalty=actPenalty+sum(abs(h),"all")/hDim/batchSz;
    end
    if signalRegWeight
        recSigPen= sum(WrecDist*h,"all");
        signalPenNow=( recSigPen...
            + sum(WoutDist*h,"all") )/hDim/batchSz;
        signalPenalty=signalPenalty+signalPenNow;
    end
    % get output from neural state
    z = z+dz+DriftStd*randn(zDim,batchSz);
    if isnan(z)
        disp('went unstable in z')
    end


    SE=sum((z-zTargetList(:,:,iStep+1)).^2);

    % increment SSE per task period
    SE_TPsplit=all(TPindicatorList(:,:,iStep+1)==tpIDs).*SE;
    SE_TPsplit=permute(SE_TPsplit,[3 2 1]);
    if any(isnan(SE_TPsplit),"all")
        disp('tpSplit went Nan')
    end
    SSE_TPsplit=SSE_TPsplit+sum(SE_TPsplit,2) ;
    numel_TPsplit=numel_TPsplit+sum(SE_TPsplit>0,2);



end



%% calculate cost and gradient
% get RMSE (performance cost)
SSE=sum(SSE_TPsplit);
MSE=SSE/sum(numel_TPsplit);
RMSE=sqrt(MSE+1e-8);
% get sensory penalty
if senseRegWeight
    sensePenalty=sum(abs([Win Wfixation Wfeedback].*origDistMat),"all")/hDim;
end
% normalize the act and signal penalties to get averages over batch/time
if actRegWeight
    actPenalty=actPenalty/numSteps;
end
if signalRegWeight
    signalPenalty=signalPenalty/numSteps;
end
% get max SV penalty
if svRegWeight
  [U,~,V]=svds(extractdata(Wrec),1);
  svPenalty=(U'*Wrec*V)^2;
end
% get the penalty for ending neural activaions not matching the starting
% neural activation of the point we finished on, which should encourage a
% flexible solution (we could go into another trial right after this
% one without reseting the neural state, as long as the next trial starts where we just finished)
if flexConWeight
    flexPenalty=mean((h-hfTarget).^2,"all");
end
% combine all penalties to get the overall cost
Cost=RMSE+signalRegWeight*signalPenalty+senseRegWeight*sensePenalty...
    +actRegWeight*actPenalty + svRegWeight*svPenalty + flexConWeight*flexPenalty;



dCostdTheta=dlgradient(Cost,Theta);

RMSE_TPsplit=sqrt(SSE_TPsplit./numel_TPsplit)';%gives nans for task periods not represented in this batch

end