% \documentclass{uai2023} % for initial submission
\documentclass[accepted]{uai2023} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like

%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2023} % ptmx math instead of Computer
% Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2023} % newtx fonts (improves upon
 % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

% for cross referencing the main text
% PLEASE ONLY USE xr IN THE SUPPLEMENTARY MATERIAL. 
% In the main paper, hard code any cross-reference to the supplementary material. 
\usepackage{xr} 

\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams
\usepackage{graphicx}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{booktabs}
\usepackage{multirow}
% \usepackage[pagebackref,breaklinks,colorlinks]{hyperref}

\externaldocument{uai2023-template}

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Two Sides of Mis-Calibration: Identifying Over and Under-Confidence Prediction for Network Calibration\\(Supplementary Material)}

% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
% \author[1]{\href{mailto:<jj@example.edu>?Subject=Your UAI 2023 paper}{Jane~J.~von~O'L\'opez}{}}
% \author[1]{Harry~Q.~Bovik}
% \author[1,2]{Further~Coauthor}
% \author[3]{Further~Coauthor}
% \author[1]{Further~Coauthor}
% \author[3]{Further~Coauthor}
% \author[3,1]{Further~Coauthor}
% % Add affiliations after the authors
% \affil[1]{%
%     Computer Science Dept.\\
%     Cranberry University\\
%     Pittsburgh, Pennsylvania, USA
% }
% \affil[2]{%
%     Second Affiliation\\
%     Address\\
%     …
% }
% \affil[3]{%
%     Another Affiliation\\
%     Address\\
%     …
%   }

\author[1]{\href{mailto:<shuang.ao@open.ac.uk>?Subject=Your UAI 2023 paper}{Shuang Ao}{}}
\author[2]{Stefan Rueger}
% \author[1,2]{Further~Coauthor}
\author[3]{Advaith Siddharthan}
% \author[1]{Further~Coauthor}
% \author[3]{Further~Coauthor}
% \author[3,1]{Further~Coauthor}
% Add affiliations after the authors
\affil[1,2,3]{%
    Knowledge Media Institute.\\
    The Open University\\
    Milton Keynes, UK
}
  \renewcommand{\thetable}{A\arabic{table}}
  \begin{document}
  
\onecolumn %% Turn this off if single column is desired for the supplement
\maketitle

 

% Fig. \ref{fig:pitt} and Eq \ref{eq:example} in the main paper can be cross referenced using \texttt{xr}. 

\appendix
\section{Identifying Under-confidence}
\label{supp_oc_uc}
% Table~\ref{tab:supp-data} lists additional simulation results; see also \citet{einstein} for a comparison. 

\begin{table*}[!ht]
\caption{Results of average under-confidence mis-calibration score (UC MCS) and average over-confidence miscalibration score (OC MCS) for baseline, TS and our proposed method cwMCS TS. All results are shown in percentage for clarity. Best results for each row are shown in bold. The value in the bracket shows the percentage of class being under or over-confident.} 

\label{tab:u_o msc}
\scalebox{.9}{
\begin{tabular}{lc|cc|cc|cc}
\hline
\multicolumn{2}{l|}{}                                                                                                      & \multicolumn{2}{c|}{Baseline}                                                 & \multicolumn{2}{c|}{TS}                                                     & \multicolumn{2}{c}{cwMCS TS}                                                  \\ \hline
\multicolumn{1}{c|}{Dataset}                                                                  & \multicolumn{1}{c|}{Model} & \multicolumn{1}{l|}{UC MCS (\%)}         & \multicolumn{1}{l|}{OC MCS (\%)} & \multicolumn{1}{l|}{UC MCS (\%)}       & \multicolumn{1}{l|}{OC MCS (\%)} & \multicolumn{1}{l|}{UC MCS (\%)}         & \multicolumn{1}{l}{OC MCS (\%)} \\ \hline
\multicolumn{1}{l|}{\multirow{13}{*}{IN}}                                               & ViT                        & \multicolumn{1}{c|}{-4.2 (48.3)}          & 7.4 (51.7)                        & \multicolumn{1}{c|}{-5.2 (61.3)}        & 7.2 (38.7)                        & \multicolumn{1}{c|}{\textbf{-3.7 (58.3)}} & \textbf{0.5 (41.7)}              \\ \cline{2-8} 
\multicolumn{1}{l|}{}                                                                         & SwinT                       & \multicolumn{1}{c|}{-11.6 (85.8)}         & 9.2 (14.2)                        & \multicolumn{1}{c|}{-5.3 (64)}          & 9.2 (36)                          & \multicolumn{1}{c|}{\textbf{-0.5 (65.7)}} & \textbf{8.7 (34.3)}              \\ \cline{2-8} 
\multicolumn{1}{l|}{}                                                                         & DeiT                       & \multicolumn{1}{c|}{-10.4 (79.6)}         & 9.0 (20.4)                        & \multicolumn{1}{c|}{-5.2 (54.7)}        & 9.6 (45.3)                        & \multicolumn{1}{c|}{\textbf{-3.2 (55.3)}} & \textbf{5.6 (44.7)}              \\ \cline{2-8} 
\multicolumn{1}{l|}{}                                                                         & CaiT                       & \multicolumn{1}{c|}{-6.7 (67.6)}          & 9.8 (32.4)                        & \multicolumn{1}{c|}{-5.2 (57.6)}        & 9.6 (42.4)                        & \multicolumn{1}{c|}{\textbf{-3.2 (58.2)}} & \textbf{7.6 (41.8)}              \\ \cline{2-8} 
\multicolumn{1}{l|}{}                                                                         & BeiT                       & \multicolumn{1}{c|}{-9.0 (81.9)}          & 8.9 (18.1)                        & \multicolumn{1}{c|}{-5.4 (65.3)}        & 8.5 (34.7)                        & \multicolumn{1}{c|}{\textbf{-4.1 (62.2)}} & \textbf{8.2 (37.8)}              \\ \cline{2-8} 
\multicolumn{1}{l|}{}                                                                         & CoaT                       & \multicolumn{1}{c|}{-10.9 (83.1)}         & 8.7 (16.9)                        & \multicolumn{1}{c|}{-6.8 (55.6)}        & 9.0 (44.4)                        & \multicolumn{1}{c|}{\textbf{-5.4 (57.8)}} & \textbf{8.0 (42.2)}              \\ \cline{2-8} 
\multicolumn{1}{l|}{}                                                                         & CrossViT                   & \multicolumn{1}{c|}{-9.6 (76.8)}          & 9.5 (23.2)                        & \multicolumn{1}{c|}{-5.5 (56.1)}        & 9.6 (43.9)                        & \multicolumn{1}{c|}{\textbf{-3.5 (56.1)}} & \textbf{8.6 (43.9)}              \\ \cline{2-8} 
\multicolumn{1}{l|}{}                                                                         & ConvMix                    & \multicolumn{1}{c|}{-19.0 (90.4)}         & \textbf{8.1 (9.6)}                & \multicolumn{1}{c|}{-8.9 (61.5)}        & 8.6 (38.5)                        & \multicolumn{1}{c|}{\textbf{-5.7 (59.7)}} & 8.4 (40.3)                       \\ \cline{2-8} 
\multicolumn{1}{l|}{}                                                                         & ConvNext                   & \multicolumn{1}{c|}{-5.9 (59.8)}          & 9.3 (40.2)                        & \multicolumn{1}{c|}{-5.3 (53.8)}        & 9.2 (46.2)                        & \multicolumn{1}{c|}{\textbf{-4.3 (51.6)}} & \textbf{9.0 (48.4)}              \\ \cline{2-8} 
\multicolumn{1}{l|}{}                                                                         & ResNet34                   & \multicolumn{1}{c|}{\textbf{-4.8 (40.1)}} & 9.9 (59.9)                        & \multicolumn{1}{c|}{-6.4 (53.8)}        & 8.6 (46.2)                        & \multicolumn{1}{c|}{-6.3 (52)}            & \textbf{8.5 (48)}                \\ \cline{2-8} 
\multicolumn{1}{l|}{}                                                                         & DenseNet121                & \multicolumn{1}{c|}{\textbf{-5.2 (43.7)}} & 9.3 (56.3)                        & \multicolumn{1}{c|}{-6.3 (55.4)}        & \textbf{8.4 (44.6)}               & \multicolumn{1}{c|}{-6.4 (55.2)}          & \textbf{8.4 (44.8)}              \\ \cline{2-8} 
\multicolumn{1}{l|}{}                                                                         & VGG16                      & \multicolumn{1}{c|}{\textbf{-5.3 (40.5)}} & 8.8 (59.5)                        & \multicolumn{1}{c|}{-6.3 (53.9)}        & 8.1 (46.1)                        & \multicolumn{1}{c|}{-6.2 (54)}            & \textbf{8.0 (46)}                \\ \cline{2-8} 
\multicolumn{1}{l|}{}                                                                         & EfficientNet               & \multicolumn{1}{c|}{-17.0 (90.3)}         & 8.4 (9.7)                         & \multicolumn{1}{c|}{-16.0 (87.4)}       & 8.2 (12.6)                        & \multicolumn{1}{c|}{\textbf{-8.8 (53.6)}} & \textbf{1.2 (46.4)}              \\ \hline
\multicolumn{1}{l|}{\multirow{3}{*}{\begin{tabular}[c]{@{}l@{}}Tiny-IN \end{tabular}}} & ResNet34                   & \multicolumn{1}{c|}{\textbf{-3.1 (11.5)}} & 10.6 (88.5)                       & \multicolumn{1}{c|}{-5.8 (58)}          & 6.3 (42)                          & \multicolumn{1}{c|}{-5.4 (52)}            & \textbf{4.3 (48)}                \\ \cline{2-8} 
\multicolumn{1}{l|}{}                                                                         & DenseNet121                & \multicolumn{1}{c|}{-1.0 (2.5)}           & 13.4 (97.5)                       & \multicolumn{1}{c|}{-5.5 (58.5)}        & 7.6 (41.5)                        & \multicolumn{1}{c|}{\textbf{-5.2 (56.5)}} & \textbf{6.6 (43.5)}              \\ \cline{2-8} 
\multicolumn{1}{l|}{}                                                                         & VGG16                      & \multicolumn{1}{c|}{\textbf{-0.8 (1.5)}}  & 15.9 (98.5)                       & \multicolumn{1}{c|}{-5.2 (57.5)}        & 5.1 (42.5)                        & \multicolumn{1}{c|}{-4.9 (52.5)}          & \textbf{3.1 (47.5)}              \\ \hline
\multicolumn{1}{l|}{\multirow{3}{*}{C100}}                                                & Res34                      & \multicolumn{1}{c|}{\textbf{-0.1 (5)}}    & 13.4 (95)                         & \multicolumn{1}{c|}{-5.2 (61)}          & 6.2 (39)                          & \multicolumn{1}{c|}{-5.0 (57)}            & \textbf{4.2 (43)}                \\ \cline{2-8} 
\multicolumn{1}{l|}{}                                                                         & DenseNet121                & \multicolumn{1}{c|}{\textbf{-1.3 (1)}}    & 15 (99)                           & \multicolumn{1}{c|}{-5.7 (55)}          & 6.3 (46)                          & \multicolumn{1}{c|}{-4.3 (58)}            & \textbf{5.6 (42)}                \\ \cline{2-8} 
\multicolumn{1}{l|}{}                                                                         & VGG16                      & \multicolumn{1}{c|}{\textbf{-1.3 (3)}}    & 10.3 (97)                         & \multicolumn{1}{c|}{-4.4 (56)}          & 8.7 (44)                          & \multicolumn{1}{c|}{-4.4 (56)}            & \textbf{3.7 (44)}                \\ \hline
\multicolumn{1}{l|}{\multirow{3}{*}{C10}}                                                 & ResNet34                   & \multicolumn{1}{c|}{0.0 (0)}              & 4.1 (100)                         & \multicolumn{1}{c|}{\textbf{-1.4 (30)}} & 2.1 (70)                          & \multicolumn{1}{c|}{-1.5 (30)}            & \textbf{1.5 (70)}                \\ \cline{2-8} 
\multicolumn{1}{l|}{}                                                                         & DenseNet121                & \multicolumn{1}{c|}{0.0 (0)}              & 11.3 (100)                        & \multicolumn{1}{c|}{-1.6 (40)}          & 5.4 (60)                          & \multicolumn{1}{c|}{\textbf{-0.2 (30)}}   & \textbf{5.2 (70)}                \\ \cline{2-8} 
\multicolumn{1}{l|}{}                                                                         & VGG16                      & \multicolumn{1}{c|}{0.0 (0)}              & 7.1 (100)                         & \multicolumn{1}{c|}{-0.9 (40)}          & 2.8 (60)                          & \multicolumn{1}{c|}{\textbf{-0.7 (50)}}   & \textbf{0.2 (50)}                \\ \hline
\end{tabular}}
\end{table*}

Table~\ref{tab:u_o msc} illustrates the results of mean under-confidence and mean over-confidence scores, as well as the percentage of classes with different confidence statuses correspondingly. For ImageNet dataset with transformers variants, most of the classes are under-confident with baselines, where the absolute value of mean under-confidence score is higher than the mean over-confidence score. The model with the highest percentage of under-confident classes is ConvMix, where only 10 percent of classes are over-confident. When it comes to CNNs, over and under-confident classes are more balanced. Surprisingly, EfficientNet has a similar behavior as Convmix, where the percentage of under-confident classes are much higher than over-confident ones. After applying TS, the percentage of over and under-confident classes are more balanced, and our proposed method cwMCS TS keeps this trend. Compared to baseline, our cwMCS TS method almost halves over and under-confidence scores, whereas TS only makes a slight change of them. For Tiny-ImageNet, CIFAR100 and CIFAR10 datasets with CNNs, more than 90 percent of classes are over-confident in baselines, with none of the classes under-confident for CIFAR10 dataset. However, more than half of the classes become under-confident after applying TS, indicating that TS can overly calibrate models. Our proposed method cwMCS TS significantly improves the mean over-confidence score and contributes to better calibration for under-confident classes. 


% \bibliography{uai2023-template}

\end{document}
