% \documentclass{uai2022} % for initial submission
\documentclass[accepted]{uai2022} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2022} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2022} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams


\usepackage{color}
\usepackage{multirow}
\usepackage{array, boldline, makecell, booktabs}
%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Supplementary Materials for Paper: Systematized Event-Aware Learning for Multi-Object Tracking}

% The standard author block has changed for UAI 2022 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<lhmin@postech.ac.kr>?Subject=Your UAI 2022 paper}{Hyemin Lee}{}}
\author[1]{Daijin Kim}
% Add affiliations after the authors
\affil[1]{%
    Department of Computer Science and Engineering\\
    Pohang University of Science and Technology\\
    Pohang, Korea
}
  
  \begin{document}
\maketitle
This supplementary material contains more detailed illustration of methods, implementation details and the network architecture used in proposed method.
In addition, we provide additional experiments based on various affinity function.

\begin{figure*}[t]
	\begin{center}
		\includegraphics[width=1\linewidth]{images/suppfig1targetlabel.pdf}
	\end{center}
	\caption{Process of assigning event for each detection box by associating with ground-truth boxes. If the ground-truth box is undetected in the previous frame, we do not care about that box. If the box is detected in the previous frame and undetected in the current frame, it is considered a missing target. If the target appears in the current frame, it is considered a new target appearance. The disappeared target in ground-truth at current frame is considered a disappeared target. The remaining detections that are not associated whether in previous and current frame are treated as false positives (FPs).}
	\label{fig:targetlabel}
\end{figure*}


\begin{figure*}[t]
	\begin{center}
		\includegraphics[width=1\linewidth]{images/suppfig2targetlabel_example.pdf}
	\end{center}
	\caption{Example of target state table for each targets.}
	\label{fig:targetlabel_example}
\end{figure*}




\section{Example of target state table for each targets}
Figure \ref{fig:targetlabel} shows the whole possible case of target state and corresponding event categorization.
Figure \ref{fig:targetlabel_example} shows example of target state for the ground-truth bounding boxes and the corresponding detection boxes.



\begin{figure*}[t]
	\begin{center}
		\includegraphics[width=1\linewidth]{images/suppfig3network.pdf}
	\end{center}
	\caption{Network architectures of proposed methods.}
	\label{fig:architecture}
\end{figure*}

\section{Implementation Details}
For reproducibility, here we provide additional implementation details including network architecture and hyperparameters.

The proposed method was implemented using PyTorch 1.3 and tested on a six-core Intel i7@3.60 GHz CPU and NVIDIA Titan Xp GPU environment.
The training requires 4.6GB GPU memory storage and takes about 22-hours for 40 training epochs.

\subsection{Network Architecture}
We used an R-FCN architecture with SqueezeNet as the backbone network for the MOTDT baseline and used the Faster R-CNN detector with ResNet-101 and feature pyramid networks (FPNs) \cite{lin2017feature} as the backbone network for the Tracktor baseline. 
The classification threshold for target initialization was set to 0.3, and the maximum loss time for termination was set to 30 frames.
The association network for MOTDT uses GoogLeNet for the association features, and the association network for the Tracktor baseline was implemented based on the Siamese CNN architecture trained on TriNet \cite{hermans2017defense} using ResNet-50.
We followed the same tracking management strategy baseline tracker excluding the association and training steps.
The minimum threshold value for the filtering candidates was set to 0.4.
We kill the missing target after 30 frames, without associating with any candidates.
The network was trained using stochastic gradient descent over 40 epochs with learning rates of ranging from $10^{-3}$ to $10^{-5}$.
The detailed network architectures are illustrated on Figure \ref{fig:architecture}.





\subsection{Tracking Management}
We followed the same tracking management strategy baseline tracker excluding the association and training step.
The minimum threshold value for filtering candidates was set to 0.4.
We limited the possible change in the location to 1/10 of the diagonal length of the frame, and the possible size change as 1/3 of the previous target size. 
We kill the missing target after 30 frames, without associating with any candidates.




\subsection{Training}
The network was trained using stochastic gradient descent over 40 epochs with learning rates of ranging from $10^{-3}$ to $10^{-5}$.
We generated training samples from the 2D MOT2015 and MOT2017 training sets and split that into 7-fold to train the network.
We randomly selected two consecutive frames, and run the simulation trackers to generate samples.
We fixed the network weight of DHN and re-identification layer of baseline tracker.
We set the $\alpha$ for event-aware loss as 0.5, and $\beta$ as 2.

\section{Experiments}
In this section, we provide additional experimental results including evaluation on MOT2020 dataset and test results on various affinity function and loss hyperparameters.

\subsection{Evaluation on MOT2020 Dataset}
he MOT2020 test dataset contains four test sequences, including densely crowded scene.
The results obtained for the MOT2020 test dataset are reported in Table \ref{table3}.



\begin{table*}
	
	\caption{Tracking Performance on the MOT2020 benchmark test set. Best in bold.}
	\label{table3}
	\begin{center}
		{
			\begin{tabular}{l|cccccccc}
				\Xhline{1pt}
				\textbf{Method} &\textbf{MOTA$\uparrow$}  &\textbf{IDF1$\uparrow$} &\textbf{ MT$\uparrow$ }&\textbf{ ML$\downarrow$}&\textbf{ FP$\downarrow$}&\textbf{ FN$\downarrow$}&\textbf{ IDS$\downarrow$}\\
				\hline
				\hline \hline
				
				GNNMatch &{54.5}&49.0 &\textbf{32.8}&25.5 &9522&{223611}&2038\\
				Tracktor++v2 &52.6 &52.7 &29.4 &26.7 &\textbf{6930}&236680&\textbf{1648}\\
				SEAT (Tracktor++v2)&\textbf{54.9} &\textbf{51.3}&{32.2} &\textbf{24.1}&8509&\textbf{223105}&1877\\
				\Xhline{1pt}
		\end{tabular}}
	\end{center}
\end{table*}


\subsection{Test Results of Various Loss Weighting Factor}
We trained proposed network using various loss weighting factors.
The results show that how each weighting factor affect to each evaluation term, especially on FP and FN term.
In the results, we can see the balance of weight makes good results and the FN factor have higher impact compared with FP factor.
The results are shown on Table \ref{table2}.


\begin{table*}
	
	\caption{Training results of various loss weighting factor.}
	\label{table2}
	\begin{center}
		
		\begin{tabular}{c |c c c c c c c c}
			\Xhline{1pt}
			\textbf{Loss weight} &\textbf{ MOTA$\uparrow$ }  &\textbf{IDF1$\uparrow$} &\textbf{ MT$\uparrow$ }&\textbf{ ML$\downarrow$}&\textbf{ FP$\downarrow$}&\textbf{ FN$\downarrow$}&\textbf{ IDS$\downarrow$}\\
			\hline
			
			\hline
			$\alpha=0,  \beta=0$ & 67.8  & 66.0  & 43.7  & \textbf{16.1}  & {2771} & 32955 & 425\\ 			
			$\alpha=0.5,  \beta=0$ & 68.1  & 68.1  & 42.3  & 17.4  & {1007} & 34432 & 371\\	
			
			$\alpha=1,  \beta=0$ & 67.6  & 69.0  & 39.9  & 17.0  & \textbf{715} & 35295 & {337}\\
			\hline	 		
			
			$\alpha=0,  \beta=0.5$ & 68.7  & 69.5  & 42.8  & {17.5}  & 2037 & {32818} & {425}\\	 		
			
			$\alpha=0,  \beta=1$ & 68.9  & 70.2  & 45.6  & {16.8}  & 1818 & {32707} & {379}\\	 				
			$\alpha=0,  \beta=2$ & 69.0  & 70.1  & 45.0  & 16.7  & 1904 & 32522 & 305\\	
			\hline
			$\alpha=0.5,  \beta=0.5$ & 69.2  & 69.9  & 45.2  & {16.3}  & 1757 & {32563} & {388}\\	
			$\alpha=0.5,  \beta=2$ &\textbf{69.3}  & \textbf{71.8}  & \textbf{45.6}  & {16.5}  & 2108 & \textbf{32078} & \textbf{297} \\
			\Xhline{1pt}
		\end{tabular}
	\end{center}
\end{table*}


\bibliography{lee_620-supp}



\end{document}
