\documentclass{midl} % Include author names

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{mwe} % to get dummy images
\jmlrvolume{-- nnn}
\jmlryear{2024}
\jmlrworkshop{Full Paper -- MIDL 2024}
%\editors{Under Review for MIDL 2024}
\editors{Accepted for publication at MIDL 2024}
\title[ASMR]{ASMR: Angular Support for Malfunctioning Client Resilience in Federated Learning}

 % Use \Name{Author Name} to specify the name.
 % If the surname contains spaces, enclose the surname
 % in braces, e.g. \Name{John {Smith Jones}} similarly
 % if the name has a "von" part, e.g \Name{Jane {de Winter}}.
 % If the first letter in the forenames is a diacritic
 % enclose the diacritic in braces, e.g. \Name{{\'E}louise Smith}

 % Two authors with the same address
 % \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\and
 %  \Name{Author Name2} \Email{xyz@sample.edu}\\
 %  \addr Address}

 % Three or more authors with the same address:
 % \midlauthor{\Name{Author Name1} \Email{an1@sample.edu}\\
 %  \Name{Author Name2} \Email{an2@sample.edu}\\
 %  \Name{Author Name3} \Email{an3@sample.edu}\\
 %  \addr Address}


% Authors with different addresses:
% \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\\
% \addr Address 1
% \AND
% \Name{Author Name2} \Email{xyz@sample.edu}\\
% \addr Address 2
% }

%\footnotetext[1]{Contributed equally}

% More complicate cases, e.g. with dual affiliations and joint authorship
\midlauthor{\Name{Mirko Konstantin\nametag{$^{1}$}} \Email{mirko.konstantin@gris.tu-darmstadt.de}\\
\addr $^{1}$ Technical Univerity Darmstadt \\
\Name{Moritz Fuchs\nametag{$^{1}$}} \Email{moritz.fuchs@gris.tu-darmstadt.de}\\
\Name{Anirban Mukhopadhyay\nametag{$^{1}$}} \Email{anirban.mukhopadhyay@gris.tu-darmstadt.de}
}

\begin{document}

\maketitle

\begin{abstract}
Federated Learning (FL) allows the training of deep neural networks in a distributed and privacy-preserving manner. However, this concept suffers from malfunctioning updates sent by the attending clients that cause global model performance degradation. Reasons for this malfunctioning might be technical issues, disadvantageous training data, or malicious attacks.
Most of the current defense mechanisms are meant to require impractical prerequisites like knowledge about the number of malfunctioning updates, which makes them unsuitable for real-world applications.
To counteract these problems, we introduce a novel method called Angular Support for Malfunctioning Client Resilience (ASMR), that dynamically excludes malfunctioning clients based on their angular distance.
%Furthermore, a new defense mechanism is introduced that exploits the concepts of clients support to detect the malicious ones. 
Our novel method does not require any hyperparameters or knowledge about the number of malfunctioning clients.
Our experiments showcase the detection capabilities of ASMR in an image classification task on a histopathological dataset, while also presenting findings on the significance of dynamically adapting decision boundaries.
\end{abstract}

\begin{keywords}
Federated Learning, Outlier Detection
\end{keywords}

\section{Introduction}
\label{sec:intro}
%% Motivation Federated Learning in medical imaging
Federated Learning (FL) has become an emerging research topic in the last few years \cite{mammen2021federated}. Due to the concept of training models in a distributed manner, FL comes with a bunch of applications in various fields \cite{yang2019federated}, such as medical imaging \cite{rieke2020future}. 
Apart from dealing with data regulations \cite{TRUONG2021102402}, access to annotated and heterogeneous data is a major challenge in medical imaging \cite{willemink2020preparing}, as model robustness depends on it. FL addresses the challenge of extending training data across multiple institutions \cite{guo2021multi}, mitigating the need for extensive annotations per institution and thereby reducing the associated costs \cite{tajbakhsh2021guest}.

However, there is \textbf{no guarantee about the utility of these updates}~\cite{ma2021federated}. 
Local models trained under unfavorable conditions can negatively affect the aggregated global model~\cite{wagner2022federated}. 
Updates leading to a degradation in global model performance are termed malfunctioning and can be categorized into two distinct categories, as demonstrated in Figure \ref{fig:problem}. 
As demonstrated by \cite{9308910}, clients may exhibit \textbf{malicious} behavior to intentionally corrupt the global model performance by tampering with the updates before transmission. Furthermore, 
\textbf{unreliable} clients, as indicated by \cite{8713350}, may unintentionally send malfunctioning updates, facing challenges like broken devices, transmission errors, or faulty image acquisition \cite{kanwal2022devil}. The unpredictable nature of these issues renders knowledge about the precise number of malfunctioning updates impractical. Corruptions do not necessarily mean that the global model diverges immediately, but have a significant impact on model performance in the long term. This makes malfunctioning updates hard to detect without knowledge about the baseline performance \cite{shejwalkar2022back}. 
To overcome this challenge, an algorithmic solution is required. In the past, several works have been published to detect and exclude malfunctioning updates from aggregation  \cite{blanchard2017machine} \cite{shejwalkar2021manipulating} \cite{sattler2020clustered} \cite{9650669}. 
These approaches frequently entail \textbf{impractical prerequisites}. Certain methodologies necessitate access to a publicly available dataset \cite{li2020learning} for generating reference updates used in training a classification model designed to identify malfunctioning updates. Conversely, others \textit{require knowledge about the constant number of malfunctioning updates} per round \cite{blanchard2017machine, shejwalkar2021manipulating}. Recognizing these research gaps, we introduce the novel concept of \textbf{angular client support}. This concept allows us to propose \textbf{an out-of-the-box solution} called Angular Support for Malfunctioning Client Resilience (ASMR) that can reliably detect malfunctioning updates. The \textbf{number of excluded updates is adapted dynamically} each round, \textbf{without required knowledge} about the population of malfunctioning clients. Furthermore, knowledge about the test data or the evaluation protocol is not required. The principle of \textbf{angular client support} introduces a novel perspective emphasizing the interconnectedness between clients based on their nature. 

\begin{figure}[h]
    \centering
    \includegraphics[width=\textwidth]{figures/problem.png}
    \caption{The leftmost FL system exemplifies an ideal scenario, showcasing optimal performance. In the middle system, an unreliable client grapples with technical issues, resulting in the degradation of the global model performance due to its updates. Meanwhile, the system on the right features a malicious client intentionally corrupting its updates.
}
    \label{fig:problem}
\end{figure}

 This concept underscores the importance of fostering collaborative relationships among benign clients to enhance collective resilience against malfunctioning counterparts. The closer clients are in angular distance, the more robust their collaborative support becomes.
In this work, we assume the malfunctioning updates to be independent. 
In particular, \textit{malicious clients do not exchange knowledge} with each other to improve the effectiveness of their attacks. Furthermore, we assume that malicious clients do not have knowledge about the other clients in terms of a number of malicious, unreliable, or benign clients. This indicates that even a majority of malfunctioning clients do not mean, that malfunctioning updates support each other. In this case, a  \textbf{supportive minority of benign updates} enables ASMR to  \textbf{detect malfunctioning updates} and maintain a  \textbf{steady convergence} during training.
To demonstrate the detection capabilities of ASMR, we applied it to three cases. 
First, we consider the case of  \textbf{malicious clients}. We selected a subset of clients that perform untargeted attacks, such as \textbf{Additive Noise Attacks (ANA)} and \textbf{Sign Flipping Attacks (SFA)}. 
Second, we apply ASMR to the case of  \textbf{unreliable clients}. The selected subset of clients train their local model on data, for which we simulate device failure and acquisition errors by augmenting the data with pathology-specific artifacts, that significantly degrade the accuracy.  
Ultimately, the combination of both previous cases is considered. The selected group of clients is either a malicious or an unreliable client. 
%%% Comparisons in the experiments
Our novel approach is compared to three other state-of-the-art (SOTA) detection algorithms on an image classification task in the field of digital pathology.
\textbf{The code is available at: \url{https://github.com/MECLabTUDA/ASMR}}. 






\section{Background}

Federated Learning became a serious asset in medical imaging in recent years \cite{rieke2020future} \cite{ng2021federated} \cite{nguyen2022federated}. Locally trained models are sent to a central server and aggregated with algorithms like FedAvg \cite{khan2021federated} \cite{ye2020edgefed} or FedAvgM \cite{hsu2019measuring}.
Federated learning systems are vulnerable to malicious clients that perform attacks to corrupt the global model performance though. 






\textbf{Malicious Client Detection:}
To overcome this vulnerability against the aforementioned attacks, defense mechanisms were introduced. 
In 2017  \cite{blanchard2017machine} proposed Multi-Krum (MKrum). MKrum chooses the updates that minimize the squared distance to its nearest neighbors. However, this technique requires an estimated number of malicious clients. Later, \cite{shejwalkar2021manipulating} proposed Divide-and-Conquer (DnC), which deals with determining principal components and computing the projections of the updates. Afterward, the updates with the largest projections are excluded from aggregation. Like MKrum, DnC requires a number of malicious clients. Meanwhile, \cite{sattler2020clustered} implemented a clustering approach known as CFL, aimed at uncovering hidden clustering among clients in FL systems to differentiate between benign and malicious clients. Due to its inherent clustering mechanism, CFL does not necessitate prior knowledge about the number of malicious clients. Likewise,\cite{li2021detection} utilize k-means clustering for the detection of malfunctioning clients, which, however, exhibits less resilience to noise and is incapable of effectively handling clusters of varying sizes according to \cite{sisodia2012clustering}. Approaches like ShieldFL \cite{ma2022shieldfl} or SFAP \cite{ma2021pocket} rely on access to training data, stored by the server. Nonetheless, in the domain of medical imaging, where centralized data storage is often unfeasible. FLTrust, as proposed by \cite{cao2020fltrust}, operates under the assumption of a trusted round phase devoid of malfunctioning clients. This assumption may not always hold, e.g. if certain clients have inherently flawed data acquisition processes from the beginning.





%\textit{In the following, we propose a method that does not require any hyperparameters like number of malicious clients or considered neighbors.}


\textbf{Histopathology:}
Computational Histopathology has the promise of elevating the workload from pathologists and accelerating the process of delivering accurate diagnosis and prognosis to patients~\cite{couture2018image,griem2023artificial}. For this process to be applicable, tissues require tissue fixation, processing, cutting, staining, and digitization, which are subject to many different kinds of artifacts~\cite{kanwal2022devil}. 
Even though the heterogeneity in tumor tissues can still be limiting, the heterogeneity intensifies across multiple institutions, thereby underscoring the value of federated learning~\cite{wagner2022federated}.
Respectively, a centralized system, holding just the local data, might not generalize and be robust to different stainings and artifacts~\cite{faryna2021tailoring}, even when they are synthetic~\cite{babendererde2023jointly}, as these cause \textit{silent failures} in many models. %In our work, we deploy synthetic artifacts from~\cite{stieber2022froodo} to simulate such malfunctions across different institutions and clients.

\section{Methodology}

In this paper, we introduce the concept of \textbf{angular client support} for malfunctioning client detection in federated learning systems. Based on this concept, a new detection technique, called ASMR, is proposed that aims to protect the system against malfunctioning updates.
Moreover, ASMR establishes a dynamic decision boundary, selectively excluding updates from aggregation, thereby eliminating the requirement for additional hyperparameters. 
This leads to automatic adaption in terms of a changing number of malicious clients.

\begin{figure}[h]
    \centering
    \includegraphics[width=\textwidth]{figures/augmentations.png}
    \caption{The first image shows an image without artifacts while the other demonstrates the artifacts that are used in this work.}
    \label{fig:artifacts}
\end{figure}


\textbf{Malfunctioning Updates:}
Malicious clients, that aim to degrade the global model performance intentionally, may send corrupt their updates before sending them to the server. Such kinds of attacks are called untargeted attacks, which can go undetected for a long time \cite{shejwalkar2022back}. 
To achieve the attackers' goal, they may change the labels of their local training data to make the model inaccurate \cite{bhagoji2019analyzing}, or they send a random update, which is not aligned to the task at all \cite{fang2020local}. 
The two common baseline attacks that are considered in this work are the ANA \cite{li2019rsa} \cite{wu2020federated} and SFA \cite{li2019rsa} \cite{wu2020federated}.
In our implementation, for the ANA, we introduced noise resulting in a global model performance decrease ranging from 20\% to 30\%. For the SFA, we multiplied the update by a negative constant to invert the direction of the gradients.
The incorporation of even a minimal number of SFAs has the potential to induce divergence in the global model. This renders SFAs unforgiving towards uncertainties in detection, yet they are anticipated to be more easily detected.
The third type of malfunctioning we consider is pathology-specific artifacts, which we simulate by the FrOoDo \cite{stieber2022froodo} framework adding artifacts to local training data.
Figure \ref{fig:artifacts} demonstrates the selected artifacts, e.g. blood cells that cover the tissues or grease spots on the probes.
The artifacts were chosen such that the global model performance is affected by 50\% - 60\%.
A table has been included in Appendix \ref{sec:motivation}, outlining the details of the malfunctioning updates to provide a comprehensive overview.
%%% Describe that the artifacts are used as attacks



\begin{figure}[h]
    \centering
    \includegraphics[width=\textwidth]{figures/new_asmr_1.png}
    \caption{This figure demonstrates the idea behind client support. The clients labeled with M hold malfunctioning updates, while the others denoted with B hold benign updates. The angles between the update, denoted with the blue arrow, to all other updates are visualized. The green angles are the supporting ones.}
    \label{fig:clients}
\end{figure}

\textbf{Angular Client Support:}
The notion of angular client support describes how clients are connected based on their angles, as depicted in Figure \ref{fig:clients}.
The close angular proximity among benign clients signifies mutual support, contrasting with malfunctioning clients that lack such support. Additionally, it is noteworthy that smaller angles between benign updates indicate stronger support among them. 
This concept draws inspiration from the research of \cite{geiping2020inverting}, who asserted that the angle between gradients conveys information about the prediction change. Hence, we posit that the angular distance of a malfunctioning update must be significantly distant from benign ones in order to exert a detrimental impact on the global model performance.



\textbf{ASMR:}
This method identifies malfunctioning clients based on their \textbf{local model parameters}. Initially, all received updates undergo normalization by dividing the vectors by their magnitude. Subsequently, the pairwise cosine distance ($cosDist$) between local model parameters is computed.
Then the outlier factor ($OF$) is determined for each update, by calculating it inspired by \cite{breunig2000lof}.
The reachability density ($rd(\bullet)$ in Eq \ref{rd}) is computed for each update, representing the inverse of the average $cosDist$, and taking into consideration \textbf{all} updates.


\begin{equation}\label{rd}
    rd(p) = 1/(\frac{ \sum_{o \in N(p)} cosDist(p,o)} {|N(p)|})
\end{equation}

where $N(p)$ is the set of all clients except $p$, and $|\bullet|$ defines the cardinality of the set $\bullet$.

Using the reachability density ($rd(\bullet)$), the outlier factor is determined by:

\begin{equation} \label{OF}
    OF(p) = \frac{\sum_{o \in N(p)} \frac{rd(o)}{rd(p)}}{|N(p)|}
\end{equation}

Subsequently, the updates undergo ordering based on their outlier factors, following which the decision boundary is established as the most substantial gap between two successive updates. The subset of updates exhibiting higher outlier factors is consequently identified as malfunctioning and is thereby excluded from the pool. Finally, any aggregation algorithm of choice can be applied to the set of benign updates. 
This characteristic has the potential to hold even in scenarios of a majority of malfunctioning clients that support each other less than the benign ones.
As long as benign clients maintain superior support, ASMR is anticipated to exhibit robust protective capabilities, encompassing precise detection and automated adjustment to the prevalence of malfunctioning updates.

\section{Experiments}
%%% Explanation about artifats and untargeted attacks and the combination of both
%%% Intro to this section 
%%% Better description of t

In this section, we commence by presenting the dataset, metrics, and training particulars employed in this study. Subsequently, we conduct a comprehensive evaluation of ASMR, juxtaposed with three alternative methods.



\textbf{Dataset:}
%% Landing sentence
We used one dataset of the histopathology domain to evaluate our approach.
The colorectal cancer dataset (CRC) \cite{kather2018100} contains 100,000 images with a resolution of $224 \times 224$ extracted from 86 human cancer tissue slides. The corresponding classification task covers nine different classes. Those are adipose(ADI), background (BACK), debris (DEB), ymphocytes (LYM), mucus (MUC), smooth muscle (MUS), normal colon mucosa (NORM), cancer-associated stroma (STR), colorectal adenocarcinoma epithelicum (TUM).
We applied a random 0.7 / 0.3 train test split. Each client gets an equally sized portion of the training data. 
For this classification task, a Resnet50 \cite{7780459} architecture was chosen, with pretrained weights from ImageNet \cite{deng2009imagenet}. 


\textbf{Evaluation:}
%%%% Metrics
In our analysis, the False Positive Rate (FPR) and True Positive Rate (TPR) were utilized to gauge the efficacy of malfunctioning update detection. Ultimately, we analyze the final test accuracy of the global model after twelve training rounds. The metrics are assessed over ten seeds for experiments with a fixed number of clients and five seeds for scenarios involving a dynamically changing number of clients.




\textbf{Training details:}
In this work, we consider a scenario of ten clients, where a subset of clients is selected to send malfunctioning updates. 
In our experimental setup, clients transmit their local model parameters as updates. The server utilizes FedAvg for aggregation to derive the global model. Prior to sending an update to the server, each client undergoes training for one local epoch.
To illustrate the serious adverse effects of three malfunctioning clients, we put a figure in the Appendix \ref{sec:app_results}.
It is expected that the severity of the malfunctioning updates correlates with the difficulty of detecting them. Therefore, SFAs are expected to be easier to detect than ANAs. To prevent the global model from negative effects, detection algorithms are applied to the system. 
ASMR is compared to MKrum \cite{blanchard2017machine}, DnC \cite{shejwalkar2021manipulating}, and CFL \cite{sattler2020clustered} to demonstrate the effectiveness. As mentioned earlier, DnC and MKrum require a parameter that specifies how many updates should be excluded from each round. For this scenario, we set the parameter to three, such that MKrum and DnC match their optimal prerequisites. 
Consequently, we establish a predetermined count of three malicious clients for
our evaluations and set the number of excluded clients in MKrum and DnC to three. To
underscore the significance of a dynamically adapting decision boundary, we explored one
more scenario involving a variable count of malfunctioning clients. Specifically, we set the
malfunctioning clients to four, each sending a malfunctioning update with a 75\% probability. Notably, MKrum and DnC persistently exclude three clients per round, maintaining
prior knowledge of the expected number of malfunctioning clients. We aim to show that
ASMR is robust against this scenario. We put a table with these details in Appendix \ref{sec:details}. The experimental framework is developed using PyTorch \cite{paszke2019pytorch}, and all experiments were carried out on \textit{Nvidia A100} GPUs.



\subsection{Results}
In our experiments, our objective is to demonstrate that ASMR attains comparable results, even if MKrum and DnC operate in their comfort zone, relying on the \textbf{impractical} knowledge about the number of malfunctioning clients. Additionally, our goal is to demonstrate that ASMR surpasses CFL, a method that also autonomously establishes the decision boundary.


\begin{figure}[h]

    \centering

    \includegraphics[width=\textwidth]{figures/new_malicious.png}
    \caption{The left plots depict a fixed number of malicious updates, while the right ones illustrate a dynamically changing count. Bar plots represent TPRs in the positive direction and FPRs in the negative. The distribution of the final global model performance is shown through a boxplot.}
    \label{fig:malicious_results}

\end{figure}

\textbf{Malicious Clients - Untargeted Attacks:}
This experiment investigates malicious clients in the context of two distinct attacks: ANA and SFA. Each attack is examined independently, and the outcomes are visually presented in Figure \ref{fig:malicious_results}. We present comprehensive results in a table in the Appendix \ref{sec:app_results}. ASMR demonstrates superior performance in detecting ANA compared to CFL and DnC, even when the number of ANA per round is fixed. Notably, our method exhibits robustness across both scenarios, seamlessly adapting to dynamic changes in the number of malicious clients without performance degradation. 

\textbf{Unreliable Clients - Pathology Specific Data Artifacts:}
In the second scenario, we analyze the impact of unreliable clients that train on data containing artifacts. The results are visualized in Figure \ref{fig:unreliable results} and comprehensive results are provided in a table in the Appendix \ref{sec:app_results}.
Notably, only our method and CFL exhibit an effective detection rate in both scenarios. The absence of detection of MKrum and DnC in the case of a dynamically changing number of unreliable clients significantly impairs the global model performance.

\begin{figure}[h]

    \centering

    \includegraphics[width=\textwidth]{figures/new_artifacts.png}
    \caption{The left plots depict a fixed number of malfunctioning updates, while the right ones illustrate a dynamically changing count. Bar plots represent TPRs in the positive direction and FPRs in the negative. The distribution of the final global model performance is shown through a boxplot.}
    \label{fig:unreliable results}

\end{figure}

\textbf{Malfunctioning Clients - Untargeted Attacks combined with Artifacts:}
In the final case, we assessed the general scenario of malfunctioning clients, encompassing those employing an ANA, SFA, or training on data with artifacts. The results are presented in Figure \ref{fig:unreliable results} and comprehensive results are in Appendix \ref{sec:app_results}. In scenarios with full but impractical knowledge about the number of malfunctioning clients, MKrum demonstrates slightly superior performance, leading to an average accuracy difference of 2.8\% compared to ASMR. However, ASMR is the only method that exhibits robustness against scenarios with a dynamically changing number of malfunctioning clients.


\section{Conclusion}
In this work, we systematically explore various instances of malfunctioning updates that can compromise the integrity of the global aggregated model within a federated learning system. Malicious clients may intentionally degrade the global model, while unreliable clients may train on disadvantageous data. Our findings underscore the deleterious impact of incorporating malfunctioning clients into the aggregation process. To mitigate these negative effects on the global model, we propose an out-of-the-box solution named ASMR that circumvents the need for hyperparameters or prerequisites, while setting an automatically adapting decision boundary for excluding clients from aggregation. This method effectively detects and excludes malfunctioning updates from the aggregation process. Our results demonstrate that our approach offers protection capabilities comparable to or better than state-of-the-art methods, even when those methods rely on unrealistic but necessary knowledge about the number of malfunctioning clients. We additionally presented findings highlighting the significance of an automatically adapting decision boundary, demonstrating the robustness of our method in the face of a dynamically changing number of malfunctioning updates. In summary, our experiments were conducted on a homogenous dataset featuring significant cases relevant to medical imaging. Moving forward, it is imperative to explore additional cases, particularly those involving heterogeneous datasets.

\newpage

\midlacknowledgments{This work was supported by the Bundesministerium für Bildung und Forschung (BMBF) with grant [01KD2210B]}.

\bibliography{midl24_113}


\appendix

\section{Training Details}
\label{sec:details}

We put two tables here to give a structured overview about the classification of malfunctioning updates \ref{tab:updates} and the details of our training setup \ref{tab:experiments}

\begin{table}[htbp]
   \floatconts
    {tab:updates}
  {\caption{Malfunctioning Clients}}
{  
\begin{tabular}{ |p{2cm}|p{1.35cm}|p{1.2cm}|p{4cm}| p{4.65cm}|}

 \hline
 
  \hline
 Malfunction & Type & Severity & Global Model Decrease & Implementation\\
 \hline
\hline
 %%%%%%%  TPR    FPR    Acc  TPR   FPR   Acc  !!!!   TPR    FPR   Acc     TPR     FPR    Acc    
 Malicious   & SFA       & Strong   & random performance & Changes direction of vectors \\
  \hline
 Malicious   & ANA       & Low      & 20-30\% &  Adds Gaussian noise\\
  \hline
 Unreliable  & Artifacts  & Middle   & 50-60\% & Artifacts on training data \\
  \hline
 %
\end{tabular}
}
\end{table}


\begin{table}[htbp]
   \floatconts
    {tab:experiments}
  {\caption{Experimental Setup}}
{  
\begin{tabular}{ |p{4.5cm}||p{2.65cm}|p{2.65cm}|}
 \hline
 \multicolumn{1}{|c||}{} & \multicolumn{1}{|c||}{Fixed} & \multicolumn{1}{|c|}{Dynamic} \\
 \hline
 \hline

 %%%%%%%  TPR    FPR    Acc  TPR   FPR   Acc  !!!!   TPR    FPR   Acc     TPR     FPR    Acc    
 Total clients & 10 & 10 \\
 \hline
 Malfunctioning clients & 3 & 4 \\
 \hline
 Malfunctioning probability & 100\% & 75\% \\
 \hline
 Local epochs & 1 & 1 \\
 \hline
 Aggregation method & FedAvg & FedAvg \\
 \hline
 Data split & \multicolumn{2}{|c||}{random 0.7 / 0.3 train test split}\\
 \hline
 %
\end{tabular}
}
\end{table}


\section{Impact of Malfunctioning Clients}
\label{sec:motivation}
Figure \ref{fig:motivation} demonstrates the impact if no defense mechanism is applied to a FL system containing three malfunctioning clients.
\begin{figure}[h]
    \centering

    \includegraphics[width=\textwidth]{figures/motivation.png}
    \caption{These graphs showcase the deleterious effects on the test accuracy of the global model over twelve rounds, underscoring the impact of three (out of ten) malfunctioning clients, operating without safeguards.}
    \label{fig:motivation}
\end{figure}

\section{Results}
\label{sec:app_results}
The subsequent tables present exhaustive results from the conducted experiments. In the notation, \textit{Fixed} signifies experiments involving a consistent number of malfunctioning clients, while \textit{Dynamic} indicates experiments where the number of malfunctioning clients varies dynamically. The reported values represent averages across the assessed seeds.

\begin{table}[htbp]
   \floatconts
    {tab:malicious}
  {\caption{Malicious Clients}}
{  
\begin{tabular}{ |p{1.5cm}||p{0.65cm}|p{0.65cm}|p{0.65cm}|| p{0.65cm}| p{0.65cm}| p{0.65cm}||p{0.65cm}|p{0.65cm}|p{0.65cm}||p{0.65cm}|p{0.65cm}|p{0.65cm}|}
 \hline
 \multicolumn{1}{|c||}{} & \multicolumn{6}{|c||}{Fixed} & \multicolumn{6}{|c|}{Dynamic} \\
 \hline
 \hline
  \multicolumn{1}{|c||}{} & \multicolumn{3}{|c||}{ANA}& \multicolumn{3}{|c||}{SFA}& \multicolumn{3}{|c||}{ANA}& \multicolumn{3}{|c|}{SFA}\\
  \hline
 Methods & TPR & FPR & Acc & TPR & FPR & Acc & TPR & FPR & Acc & TPR & FPR & Acc\\
 \hline
 \hline
 %%%%%%%  TPR    FPR    Acc  TPR   FPR   Acc  !!!!   TPR    FPR   Acc     TPR     FPR    Acc    
 DnC   & .872 & .055 & .885 & 1. & .0 & .928         & .789 & .099 & .868      & .835 & .091  & .185\\
  \hline
 CFL   & .514 & .0   & .832 & 1. & .0 & .926         & .542   & .019   & .852  & 1. & .007   & .934\\
  \hline
 MKrum & 1.   & .0   & .926 & 1. & .0 & .933         & .842   & .084   & .902  & .877   & .096   & .138\\
  \hline
 ASMR. & 1.   & .001 & .924 & 1. & .0 & .932         & 1.   & .024   & .925    & 1. & .0 & .931\\
 \hline
 %
\end{tabular}
}
\end{table}


\begin{table}[htbp]
   \floatconts
    {tab:unreliable}
  {\caption{Unreliable and Malfunctining Clients}}
{  
\begin{tabular}{ |p{1.5cm}||p{0.65cm}|p{0.65cm}|p{0.65cm}|| p{0.65cm}| p{0.65cm}| p{0.65cm}||p{0.65cm}|p{0.65cm}|p{0.65cm}||p{0.65cm}|p{0.65cm}|p{0.65cm}|}
 \hline
  \multicolumn{1}{|c||}{} & \multicolumn{6}{|c||}{Fixed} & \multicolumn{6}{|c|}{Dynamic} \\
 \hline
 \hline
  \multicolumn{1}{|c||}{} & \multicolumn{3}{|c||}{Artifacts}& \multicolumn{3}{|c||}{Combined}& \multicolumn{3}{|c||}{Artifacts}& \multicolumn{3}{|c|}{Combined}\\
  \hline
 Methods & TPR & FPR & Acc & TPR & FPR & Acc & TPR & FPR & Acc & TPR & FPR & Acc\\
 \hline
 \hline
 DnC   & .908 & .039 & .8     & .678 & .14  & .321 & .771 & .078 & .772 & .59 & .184 & .296\\
  \hline
 CFL   & 1.   & .0   & .929   & .675 & .0   & .674 & 1. & .005 & .937 & .685 & .039 & .46\\
  \hline
 MKrum & 1.   & .0   & .931   & 1.   & .0   & .928 & .833 & .071 & .621 & .868 & .095 & .798\\
  \hline
 ASMR. & 1.   & .0   & .931   & .956 & .006 & .9 & 1. & .0 & .934 & .98 & .04 & .901\\
 \hline
 %
\end{tabular}
}
\end{table}







\end{document}


%%\midlacknowledgments{This work was supported by the Bundesministerium f¨ur Bildung und Forschung (BMBF) with grant [01KD2210B]}