% \documentclass{uai2024} % for initial submission
% %\documentclass[accepted]{uai2024} % after acceptance, for a revised version; 
% % also before submission to see how the non-anonymous paper would look like 
                        
% %% There is a class option to choose the math font
% % \documentclass[mathfont=ptmx]{uai2024} % ptmx math instead of Computer
%                                          % Modern (has noticeable issues)
% % \documentclass[mathfont=newtx]{uai2024} % newtx fonts (improves upon
%                                           % ptmx; less tested, no support)
% % NOTE: Only keep *one* line above as appropriate, as it will be replaced
% %       automatically for papers to be published. Do not make any other
% %       change above this note for an accepted version.

% %% Choose your variant of English; be consistent
% \usepackage[american]{babel}
% % \usepackage[british]{babel}

% %% Some suggested packages, as needed:
% \usepackage{natbib} % has a nice set of citation styles and commands
%     \bibliographystyle{plainnat}
%     \renewcommand{\bibsection}{\subsubsection*{References}}
% \usepackage{mathtools} % amsmath with fixes and additions
% % \usepackage{siunitx} % for proper typesetting of numbers and units
% \usepackage{booktabs} % commands to create good-looking tables
% \usepackage{tikz} % nice language for creating drawings and diagrams
% \usepackage{multirow}
% \usepackage{multicol}
% %% Provided macros
% % \smaller: Because the class footnote size is essentially LaTeX's \small,
% %           redefining \footnotesize, we provide the original \footnotesize
% %           using this macro.
% %           (Use only sparingly, e.g., in drawings, as it is quite small.)

% %% Self-defined macros
% \newcommand{\swap}[3][-]{#3#1#2} % just an example

% \title{Supplementary Materials of $\boldsymbol{\alpha}$-Former: Local-Feature-Aware (L-FA) Transformer}

% % The standard author block has changed for UAI 2024 to provide
% % more space for long author lists and allow for complex affiliations
% %
% % All author information is authomatically removed by the class for the
% % anonymous submission version of your paper, so you can already add your
% % information below.
% %
% % Add authors
% \author[1]{\href{mailto:<jj@example.edu>?Subject=Your UAI 2024 paper}{Jane~J.~von~O'L\'opez}{}}
% \author[1]{Harry~Q.~Bovik}
% \author[1,2]{Further~Coauthor}
% \author[3]{Further~Coauthor}
% \author[1]{Further~Coauthor}
% \author[3]{Further~Coauthor}
% \author[3,1]{Further~Coauthor}
% % Add affiliations after the authors
% \affil[1]{%
%     Computer Science Dept.\\
%     Cranberry University\\
%     Pittsburgh, Pennsylvania, USA
% }
% \affil[2]{%
%     Second Affiliation\\
%     Address\\
%     …
% }
% \affil[3]{%
%     Another Affiliation\\
%     Address\\
%     …
%   }
  
%   \begin{document}

% \begin{appendix}

\section{More Ablation Studies}
\subsection{Influence of the encoder layers and decoder layers}
In the table.\ref{table:2}, we compare the influence of using different encoder and decoder layers in our architecture. We can see that with the increase of encoder and decoder layers, the performance will first improve and then maintain a similar performance. So, in our architecture, we use six layers of encoder and three layers of decoder.

\begin{table}[htbp]
% \setlength\tabcolsep{3pt}
%   \small
%   \footnotesize
  \scriptsize
  \caption{Comparison with the traditional descriptor, the best results are highlighted in \textbf{bold}.}
  % \vspace{10pt}
  \centering
\resizebox{0.47\textwidth}{!}{
\begin{tabular}{c c | c c c | c c c}
    \toprule
      \multirow{2}*{encoder} & \multirow{2}*{decoder} & \multicolumn{3}{c|}{COD10K} & \multicolumn{3}{c}{NC4K} \\
     ~ & ~ & AP & AP50 & AP75 & AP& AP50 & AP75\\
    \midrule
   1 & 3 & 37.256 & 68.755 & 37.982 & 39.453 & 69.538 & 40.453\\
   3 & 1 & 38.453 & 70.188 & 39.423 & 40.020 & 70.358 & 41.168\\
   3 & 3 & 40.421 & 70.861 & 40.453 & 41.093 & 71.592 & 42.048\\
   3 & 6 & 41.424 & 72.826 & 40.826 & 41.726 & 72.059 & 42.824\\
   6 & 3 & \textbf{42.453} & \textbf{72.735} & 41.758 & \textbf{42.936} & \textbf{72.905} & \textbf{43.278}\\
   6 & 6 & 42.187 & 72.682 & 41.744 & 42.921 & 72.723 & 43.168\\
   6 & 9 & 42.424 & 72.672 & \textbf{41.776} & 42.876 & 72.781 & 43.133\\
  \bottomrule
\end{tabular}
}
\label{table:2}
\end{table}

\subsection{Ablation studies of using different backbone}
In the table.\ref{table:3}, we compare the performance of using different backbones in our architecture. 

\begin{table}[htbp]
% \setlength\tabcolsep{3pt}
%   \small
%   \footnotesize
  \scriptsize
  \caption{Comparison with the traditional descriptor, the best results are highlighted in \textbf{bold}.}
  % \vspace{10pt}
  \centering
\resizebox{0.5\textwidth}{!}{
\begin{tabular}{c | c c c | c c c}
    \toprule
      \multirow{2}*{Backbone} & \multicolumn{3}{c|}{COD10K} & \multicolumn{3}{c}{NC4K} \\
     ~ & AP & AP50 & AP75 & AP& AP50 & AP75\\
    \midrule
   Resnet-50(Default) & 42.453 & 72.735 & 41.758 & 42.936 & 72.905 & 43.278\\
   Resnet-18 & 36.489 & 67.159 & 37.188 & 37.458 & 68.711 & 38.950\\
   Resnet-101 & 43.188 & 73.725 & 42.713 & 43.794 & 72.313 & 44.484\\
   Vgg-16 & 37.148 & 68.469 & 37.195 & 39.948 & 69.159 & 40.152\\
  \bottomrule
\end{tabular}
}
\label{table:3}
\end{table}

\section{More Implement Details}
\subsection{More Details of the Feature Aggregation Adapter}
Our feature aggregation adapter uses a tiny initial value to guarantee at the beginning of the training, the output domain is the same as the input image domain. Specifically, we set the mean and the variance value of the convolution weight as 0 and 0.001, and the bias value of the convolution layer as 0. Using the tiny-initialized convolution layer and the skip connection, we can know that the output of the adapter is almost the same as the input at the beginning of the training. 

\subsection{More Details of the Edge-aware Feature Fusion Module} 
In this section, we provide more details about our edge-aware feature fusion module. Our edge-aware feature fusion module uses multi-scale features to predict the boundary of the target object. As shown in table.\ref{table:1}, we provide the input and output shapes of the different edge prediction blocks. 
\linespread{1.5}
\begin{table}[hbp]
    \centering
    \caption{Input and output shape of different edge prediction block}
    \begin{tabular}{c|c|c}
    \toprule
    Block & Input Shape & Output Shape\\
    \midrule
    block$_5$ & $\frac{H\times W}{32}$ & $\frac{H\times W}{16}$\\
    block$_4$ & $\frac{H\times W}{16}$ & $\frac{H\times W}{8}$\\
    block$_3$ & $\frac{H\times W}{8}$ & $\frac{H\times W}{4}$\\
    block$_2$ & $\frac{H\times W}{4}$ & $\frac{H\times W}{4}$\\
    \bottomrule
    \end{tabular}
    \label{table:1}
\end{table}
\linespread{1.0}

\subsection{More Details of the Prediction Head}
In this section, we provide more details about our prediction head. We follow the same architecture as OSFormer\cite{pei2022osformer}. As shown in Fig.\ref{fig:head_supp}. During the training process, we use a fully-connected layer to calculate the location label. At the same time, we use a multi-layer perceptron to calculate the instance-aware parameters. Then we assign positive and negative locations using ground truth. During the testing process, we use a confidence score of the location label to filter ineffective parameters of the instance-aware parameters. Then we use two linear layers to calculate the weight and bias to calculate the segmentation mask. Then we use an up-sampling operation to get the final prediction masks. 

\begin{figure}[htbp]
    \centering
    \includegraphics[width=1.0\linewidth]{Figs/head_supp_cropped.pdf}
    \caption{During the training process, our prediction head uses location labels as supervision, and during the testing process, our prediction head uses location labels to filter ineffective parameters.}
    \label{fig:head_supp}
\end{figure}

\section{More Visualizations}
As shown in Fig.\ref{fig:vis_supp}, we provide more visualizations in this section. 

\begin{figure*}[htbp]
    \centering
    \includegraphics[width=\linewidth]{Figs/vis_supp_cropped.pdf}
    \caption{The qualitative results of $\alpha$-Former.}
    \label{fig:vis_supp}
\end{figure*}

% References
% \bibliography{uai2024-template}


% \end{document}
