\documentclass[accepted]{uai2022}  % for initial submission
% \documentclass[accepted]{uai2022} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2022} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2022} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams
\usepackage{graphicx}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{booktabs}
\usepackage{graphicx}
\usepackage{multirow}
\usepackage{lipsum}
%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Cyclic Test Time Augmentation with Entropy Weight Method \\ (Supplementary material)}

% The standard author block has changed for UAI 2022 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{Sewhan Chun}
\author[2]{Jae Young Lee}
\author[2]{Junmo Kim}
% \author[1]{Further~Coauthor}
% \author[3]{Further~Coauthor}
% \author[3,1]{Further~Coauthor}
% Add affiliations after the authors
\affil[1]{%
    NAVER CLOVA\\
    Republic of Korea
}
\affil[2]{%
    SIIT Lab\\
    KAIST\\
    Republic of Korea
}
  
  \begin{document}
\maketitle

\section{Appendix A: Details of TTA policy}
\label{sec: Details of TTA policy}

\subsection{Predefined Transformations}

\begin{figure}[!h!]
    \centering
    \includegraphics[scale=0.32,angle = -90]{images/sub_1.PNG}
    \caption{ \label{figure:predefined_transformations} Illustration of the 12 predefined transformations on an image of a bulbul from ImageNet. 12 different transformations were used, identity transformation being the exit signal in cyclic TTA.}
\end{figure}

In this study, we use predefined transformations, similar to the baseline. However, in order to leverage the effect from the iterative transformation of cyclic mechanism, we change the magnitudes and establish a modified set of transformations as Figure~\ref{figure:predefined_transformations}. The 12 transformations include: identity, rotation, zooming, contrast adjustment, sharpness adjustment, and color saturation change. The transformations were performed using functions from torchvision and PIL libraries. For identity transformation no actual transformation of image is conducted for the image, leaving no changes. Rotation refers to clock/counter clock wise rotational transformation with magnitude of 20 or -20 degrees. Zoom transformation refers to resizing the image and cropping the central part of the image. Auto contrast transformation calculates the color histogram and stretches it to achieve uniform contrast level. Sharpness transformation refers to adjustment in sharpness, less than 1.0 meaning edges will be blurred and larger than 1.0 refers to sharpening of the edges. Color saturation changes the the saturation values of each pixel color values. For all the padding-requiring transformations, symmetrical padding was used.






\subsection{Conventional Test time augmentation}
The typical conventional TTA includes center crop, horizontal flip, "5 crops", and "10 crops". Many performances on ImageNet competitions have used such schemes for performance improvement \citep{Alexnet,GoogleNet,VGG,Resnet}. Center crop method basically utilizes the central cropped patch from an original image. Horizontal flip refers to utilizing a horizontally flipped image with addition to the center cropped image. "5 crops" takes the four corner image patches and central cropped image patch to fully examine the entire area of the original image. "10 crops" refers to composing 5 additional cropped image patches from horizontally flipped version of the original image to the original 5 crops, in total of 10. The demonstration is presented in Figure~\ref{figure:conventional_TTA}.
\begin{figure*}[!h]
    \centering
    \includegraphics[scale=0.3]{images/Conventional Transformation.PNG}
    \caption{ \label{figure:conventional_TTA} Demonstration of the conventional TTA policies. The cost (number of input images) increases from 1 to 2, 5, and 10 as goes from center crop to horizontal flip, ``5 crops", and ``10 crops".}
\end{figure*}


\subsection{ImageNet-C Corruptions}

In this study we have used ImageNet-C benchmark, corrupted version of ImageNet by Hendrycks et al. \citep{Robustness_Imagenet-C}. ImageNet-C simulates 19 different types of corruptions: Gaussian Noise, Shot Noise, Impulse Noise, Defocus Blur, Glass Blur, Motion Blur, Zoom Blur, Snow, Frost, Fog, brightness, Contrast, Elastic Transform, Pixelate, JPEG, Speckle, Gaussian Blur, Spatter, and Saturate. Each of these corruptions is simulated in 5 different severities. The performance on this dataset is calculated with mean Corruption Error, $mCE$. The $mCE$ is calculated as average error rate of a neural network to the error rate of Alexnet \citep{Alexnet}. Thus, for $mCE$, 100\% indicates a neural network performance of equal robustness to the Alexnet, and 0 being a perfectly robust neural network. Illustration of all the types of corruption on a single image is presented in Figure~\ref{figure:corruption}.

\begin{figure*}[!h!]
    \centering
    \includegraphics[scale=0.37,angle =90]{images/Corruption.PNG}
    \caption{ \label{figure:corruption} Illustration of the 19 different types of corruptions from ImageNet-C with a picture of an eagle.}
\end{figure*}


\section{Appendix B: Difference between Single iteration to Cyclic iteration}
\label{sec: Diff}

As we explained in the main material, single iteration and cyclic iteration differ in number of transformation on a single image. While single method selects a single transformation from the set of the predefined transformations, the cyclic method could select combination of multiple number of transformations for a single image. The illustration is presented in Figure~\ref{figure:difference}.

\section{Appendix C: Oracle-TTA Performance}
\label{sec: Oracle}

In this section, we demonstrate Oracle-TTA performance on ImageNet and ImageNet-C dataset. Oracle is hypothetically well trained loss predictor, which is able to perfectly predict which of transformation candidates is needed to result in the least loss value by the target network. The following experiment in Table~\ref{table:oracle} suggests the upper bound and potential rooms for improvement in the loss prediction pipeline. Cyclic behavior allows more opportunities for the the pipeline to generate well conditioned image, resulting in lower error rate as the maximum number of iteration increases.


\section{Appendix C: Demonstration of Entropy Weight Method}
\label{sec: Diff}

In this section, we will demonstrate how visually EWM corresponds to TTA. EWM calculates relative entropy of each augmentation and inversely reflects the entropy to the augmentation, as the uncertainty. The importance of each augmentation is represented by respective weight value, which corresponds to the ``certainty". The demonstration of EWM is presented in Figure~\ref{figure:EWM1}, ~\ref{figure:EWM2}, and ~\ref{figure:EWM3}.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newpage


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{figure*}[!h!]
    \centering
    \includegraphics[scale=0.355,angle =90]{images/Cyclic_Example.PNG}
    \caption{ \label{figure:difference} Illustration of exemplary cases of the loss prediction pipeline. The blue bordered images refers to the finalized input images by the previous method \citep{TTA-Policy_L2T} with single iteration. That of green refers to the transformed input images by ours.}
\end{figure*}
\newpage
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \section{Appendix D: CIFAR100 Experiments}
% \label{sec: Diff}

\begin{table*}[ht!]
\centering
\rule{126mm}{1pt}\\%
\def\arraystretch{1.3}
\begin{tabular}{ccccccc}

\multirow{2}{*}{Train Time Augmentation}        & \multirow{2}{*}{TTA Method}                         & \multirow{2}{*}{Cost}   & \multicolumn{2}{c}{Average}                 & \multicolumn{2}{c}{EWM (Ours)} \\ \cline{4-7} 
                                                &                                                     &                         & Clean & \emph{mCE}                                 & Clean     & \emph{mCE}                \\ \hline
\multicolumn{1}{c|}{\multirow{10}{*}{Augmix}} & \multicolumn{1}{c|}{Center Crop}                    & \multicolumn{1}{c|}{1}  & 23.61 & \multicolumn{1}{c|}{37.19}          &           &                    \\
\multicolumn{1}{c|}{}                           & \multicolumn{1}{c|}{Horizontal Flip}                & \multicolumn{1}{c|}{2}  & 22.59 & \multicolumn{1}{c|}{35.96}          & 22.51     & 35.96              \\
\multicolumn{1}{c|}{}                           & \multicolumn{1}{c|}{5 Crops}                        & \multicolumn{1}{c|}{5}  & 23.30 & \multicolumn{1}{c|}{35.79}          & 23.17     & 35.61              \\
\multicolumn{1}{c|}{}                           & \multicolumn{1}{c|}{10 Crops}                       & \multicolumn{1}{c|}{10} & 22.35 & \multicolumn{1}{c|}{35.64}          & 22.29     & 35.50              \\ \cline{2-7} 
\multicolumn{1}{c|}{}                           & \multicolumn{1}{c|}{\multirow{3}{*}{Single}}        & \multicolumn{1}{c|}{1}  & 23.48 & \multicolumn{1}{c|}{35.69}          &           &                    \\
\multicolumn{1}{c|}{}                           & \multicolumn{1}{c|}{}                               & \multicolumn{1}{c|}{2}  & 22.13 & \multicolumn{1}{c|}{34.78}          & 22.06     & 34.42              \\
\multicolumn{1}{c|}{}                           & \multicolumn{1}{c|}{}                               & \multicolumn{1}{c|}{3}  & 21.87 & \multicolumn{1}{c|}{34.87}          & 21.85     & 34.36              \\ \cline{2-7} 
\multicolumn{1}{c|}{}                           & \multicolumn{1}{c|}{\multirow{3}{*}{Cyclic (Ours)}} & \multicolumn{1}{c|}{1}  & 23.49 & \multicolumn{1}{c|}{34.86} &           &                    \\
\multicolumn{1}{c|}{}                           & \multicolumn{1}{c|}{}                               & \multicolumn{1}{c|}{2}  & 22.10 & \multicolumn{1}{c|}{34.21}          & 22.12     & 34.08              \\
\multicolumn{1}{c|}{}                           & \multicolumn{1}{c|}{}                               & \multicolumn{1}{c|}{3}  & 21.93 & \multicolumn{1}{c|}{34.45}          & 21.87     & 34.02              \\ \hline

\end{tabular}
\rule{126mm}{1pt}%

\caption{\label{table:performance} Performance comparison of the previous methods with the proposed method on CIFAR100 and CIFAR100-C. Note that $mCE$ metric equation for CIFAR100-C is different from that of ImageNet-C.  $mCE$ for CIFAR100-C is not measured relative to the performance of AlexNet, but it measures the average of accuracies of all the corrupted versions of the dataset.}
\end{table*}

\newpage

\begin{table*}[!h!]
\centering
\rule{70mm}{1pt}\\%
\def\arraystretch{1.3}

\begin{tabular}{cc|cc}
    \centering
    TTA Method    & Iteration & Clean  & $mCE$  \\
    \hline
    Center Crop          & 0                & 22.39 & 65.07 \\
    Oracle-TTA    & 1                & 15.99 & 53.28 \\
    \hline
    \multirow{2}{*}{Cyclic Oracle} & 2                & 13.06 & 47.20 \\
                  & 3                & 11.68 & 44.02


\end{tabular}

\centering
\rule{70mm}{1pt}%
\caption{\label{table:oracle} Performance using hypothetically perfect loss predictor named Oracle-TTA. Resnet-50 trained with the augmix data augmentation method was used as the target network. ``Iteration" refers to the number of loss prediction done to each image. Clean and $mCE$ refers to the error rate from ImageNet dataset and the mean corruption rate from ImageNet-C dataset. The lower values represent better performance. The correlation between the ``Iteration" and the performance indicates that, upon selecting suitable transformation, more opportunities for transformation allows the input image to be better ready for the target network, which increases the upper bound of the general performance. 
}
\end{table*}

\newpage

\begin{figure*}[!h!]
    \centering
    \includegraphics[scale=0.29]{images/EWM_Example_1.PNG}
    \caption{ \label{figure:EWM1} Demonstration of EWM on two images from ImageNet-C with 10 crops TTA policy. Numerical values below the each image refers to relative entropy, calculated by the target network. Red border refers to the patches with high entropy (uncertainty). {\bf Top:} An image of dogs, divided into 10 patches. The red bordered patches contain image of a dog with exclusion of head part in the central dog's face. {\bf Bottom:} An image of a man, divided into 10 patches. The red bordered patches have missing part of the facial feature, leaving relatively high uncertainty.}
\end{figure*}


\newpage

\begin{figure*}[!h!]
    \centering
    \includegraphics[scale=0.5]{images/EWM_Example_2.PNG}
    \caption{ \label{figure:EWM2} Demonstration of EWM on two images from ImageNet-C with baseline loss prediction TTA policy \citep{TTA-Policy_L2T} with single iteration of transformation prediction. 3 augmentations were used as hyper parameter, resulting in 3 input images in total. Numerical value below the each image refers to relative entropy, calculated by the target network. {\bf Top:} With a symmetrical padding, the zoom out transformation has presented more coherent level of feature in bird's facial structure $(i=3)$, as it had minor cut by the border of the image. {\bf Bottom:} An image of a yawl is also aided by zoom out transformation $(i=1)$, the padding has created the fake reflection of the yawl to the water, resulting in less entropy (uncertainty). The set of predefined transformation from previous method was used.}
\end{figure*}

\newpage

\begin{figure*}[!h!]
    \centering
    \includegraphics[scale=0.55]{images/EWM_Example_3.PNG}
    \caption{ \label{figure:EWM3}  Demonstration of EWM on an images from ImageNet-C with our cyclic TTA method. 3 augmentations were used as hyper parameter, resulting in 3 input images in total. Each image has tried to remove the Gaussian noise by performing blurring and zoom out. The entropy among the images show somewhat relatively uniform values.}
\end{figure*}


\newpage
\newpage

\bibliography{chun_45}


\end{document}
