% \documentclass{uai2022} % for initial submission
\documentclass[accepted]{uai2022} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2022} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2022} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)
\usepackage{soul}
% \newcommand{\tabincell}[2]{\begin{tabular}{@{}#1@{}}#2\end{tabular}} 
\usepackage{graphicx}
\usepackage{float} 
\usepackage{caption}
\usepackage{graphicx}
\usepackage{float} 
\usepackage{amsfonts}
%\usepackage{subfigure}
\usepackage{subcaption}
%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{SASH: Efficient Secure Aggregation Based on SHPRG For Federated Learning}

% The standard author block has changed for UAI 2022 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1,3]{\href{mailto:<jj@example.edu>?Subject=Your UAI 2022 paper}{Zizhen Liu}{}}
\author[2]{Si Chen}
\author[1,3]{Jing Ye}
\author[2]{Junfeng Fan}
\author[1,3]{Huawei Li}
\author[1,3]{Xiaowei Li}

% Add affiliations after the authors
\affil[1]{%
   Institute of Computing Technology\\
   Chinese Academy of Sciences\\
    Beijing, China
}
\affil[2]{%
    Open Security Research\\
    Shenzhen, China
}
\affil[3]{%
    CASTEST\\
     Beijing, China
  }
  
  \begin{document}
\maketitle

\begin{abstract}
To prevent private training data leakage in Federated Learning systems, we propose a novel secure aggregation scheme based on seed homomorphic pseudo-random generator (SHPRG), named SASH. SASH leverages the homomorphic property of SHPRG to simplify the masking and demasking scheme, which for each of the clients and for the server, entails a overhead linear w.r.t model size and constant w.r.t number of clients. We prove that even against worst-case colluding adversaries, SASH preserves training data privacy, while being resilient to dropouts without extra overhead. We experimentally demonstrate SASH significantly improves the efficiency to 20× over baseline, especially in the more realistic case where the numbers of clients and model size become large, and a certain percentage of clients drop out from the system.

\end{abstract}

\section{Introduction}\label{sec:intro}
In Federated Learning (FL), multiple participants collaborate to train a machine learning model without putting together their raw training data \citep{RN31}. In the scenario of horizontal FL, a central coordinator updates the global model with the aggregation of the clients’ local model updates. However, as recent studies argue, model inference attacks can compromise the privacy of training data from the information of model update \citep{RN34, RN23}, which puts forward requirements that the model update should be exchanged in a secure way. A secure aggregation solution for FL aims to solve this problem, and typically considers the following aspects:
\begin{enumerate} 
    \item Efficiency: the computation and communication cost introduced by the scheme, and the scalability to a large number of clients and parameters.
    \item Security: the threat model of the scheme, including the goal of the adversaries, whether the adversaries collude, and the maximal number of colluding participants. 
    \item Practicality: the robustness of the scheme against client dropouts, and whether its implementation is compatible with a common Internet environment.
    \item Accuracy: quality of the trained model, including peak accuracy and convergence speed.
\end{enumerate}

Existing privacy solutions for FL apply privacy protection techniques, including Secure Multiparty Computation
(SMC) \citep{RN44, RN167, RN168, RN169, RN170, RN27}, Homomorphic Encryption(HE) \citep{RN48, RN168}, and Differential Privacy(DP) \citep{RN49}for various practical scenarios. However, when the number of clients, model size, and the dropout rate become large in real-world applications, it is still challenging to construct a secure and efficient aggregation scheme.

SecAgg \citep{RN44} is one of the most practical solutions to provide privacy guarantees in the horizontal FL. In SecAgg, for any pair of clients $u,v$ in the set of clients $\mathcal{U}$, they securely agree upon a masking seed $s_{u,v}$. Then for each client $u$ in $\mathcal{U}$, the seed of another mask is generated. The message of client $u$ is masked as $y_u=m_u+\textrm{PRG}(b_u)+\sum_{v<u}\textrm{PRG}(s_{u,v})-\sum_{v>u}\textrm{PRG}(s_{u,v})$. All the 
masking seeds $b_u$ and $s_{u,v}$ are also secret-shared among all clients, which allows the reconstruction upon sufficient shares. 
After receiving the masked values, the aggregator can reconstruct $b_u$ and remove 
$\textrm{PRG}(b_u)$ from $y_u$ if $u$ is still online, or reconstruct $s_{u,v}$ for every other online client $v$ and remove all
$\textrm{PRG}(s_{u,v})$s from $y_u$ if $u$
drops out. The pairwise masking scheme entails computation complexities of $O(N^2M)$
for the aggregator, and $O(MN)$ for each client, where $N$ is the number of clients, and $M$ is the number of parameters in the model under training. This quadratic overhead in $N$ may limit its practical applications to FL systems with thousands of clients. Several subsequent works \citep{RN167, RN168, RN169} make the secure aggregation more efficient but will incur new restrictions, such as weakened dropout resilience or increased communication costs.

To further improve the efficiency of SecAgg, this work develops a novel secure aggregation scheme based on seed-homomorphic pseudo-random generator (SHPRG) \citep{RN81}, which has the property $\sum \textrm{SHPRG}(k_i)\approx \textrm{SHPRG}(\sum k_i)$. For each client, instead of masking the data with $N$ PRG outputs, one mask is sufficient. The masked data is $y_u=m_u+\textrm{SHPRG}(k_u)$, where $k_u$ is a self-generated seed. If the aggregator can securely get the sum of a subset of $\{k_u\}$, then the aggregator can remove the mask of the masked aggregation result by computing $\sum m_u=\sum y_u-\textrm{SHPRG}(\sum k_u)$. If some clients drop out during the process, the remaining clients’ masked data are still valid, and the aggregator can get the correct sum, as long as the aggregator can securely get the sum of masking seeds of the surviving clients.

Overall, in this paper, we propose an efficient secure aggregation scheme for federated learning, named SASH. Our construction has the following traits: 
\begin{itemize}
     \item Masking and demasking are simpler and more efficient than the previous solutions, resulting in computation complexities of $O(M)$ for each of the clients and for the aggregator, and the communication cost is the same with SOTA schemes. 
    \item Our scheme is robust to up to $D_{\textrm{max}}$ arbitrary dropouts, and in the concrete instantiation, $D_{\textrm{max}}=N/3$. In addition, the enhancement of efficiency is more significant when dropouts occur.
    \item The model aggregation is proved to achieve training data privacy against up to $T_{col}$ colluding clients, and in the concrete instantiation, $T_{col}=2N/3$. 
\end{itemize}

In the following sections, we give detailed construction of our secure aggregation scheme, and show that it achieves the four aspects of requirements for a practical and secure FL system.

\section{Problem Statement and Background}
In this section, we first formulate the problem we target, and then review previous research and the cryptographic primitives needed for our constructions.
\subsection{Problem statement}
In this paper, we focus on the privacy of the typical horizontal federated learning, where $N$ data owners (also called clients) collaboratively train a model with $M$ parameters with the coordination of an aggregator (also called the server). $N$ can range from a hundred to tens of millions \citep{RN68} and $M$ may scale to millions \citep{bonawitz2019towards}. To avoid the inference of training data privacy from the exchanged model update $m_u$ during the learning process \citep{274683}, secure aggregation aims to learn $\sum{m_u}$ without revealing additional sensitive information beyond the model aggregation.

The threat model is honest-but-curious, and allows colluding. The potential adversaries in FL may be clients and the aggregator who can get access to the exchanged data. In colluding case, the adversary may control a set of up to $T_{col}$ clients, and may also control the aggregator. The independent or colluding adversaries can attempt to infer sensitive information based on the viewed intermediate data, such as the original individual model update, which can be utilized to infer the training data of some clients.

Dropout is another challenge that may interrupt secure aggregation. A random subset of up to $D$ clients may drop out of the system at any point of time during the execution of secure aggregation. It may fail model aggregation or result in a wrong global model.

In the proposed protocol, while keeping the trained model's accuracy unaffected, and keeping the implementation compatible with common Internet environment, we target to construct an efficient secure aggregation scheme, which protects the privacy of clients' data in colluding cases, and is robust again a significant portion of client dropouts.


\subsection{Related work}
We briefly review privacy solutions for horizontal FL in this section.

HE provides a general solution for security and privacy enhancements of FL \citep{RN48, RN168}. Many recent works advocate the use of additively HE schemes, notably Paillier \citep{RN5}, as the primary means of privacy guarantee in FL. HE performs complex cryptographic operations that are relatively expensive to compute. 
The reference develops a simple batch encryption technique based on new quantization and encoding schemes to improve efficiency. However, questions arise about the collusion threats. 

DP is a rigorous mathematical framework to improve the privacy of the machine learning model by introducing a level of uncertainty into the released model \citep{RN49}. With carefully added randomness to training data and/or trained models, DP can protect the privacy of individual samples in the dataset. DP can be used in combination with our scheme to provide further security guarantees.

SMC guarantees that a set of parties compute a function in a way that each one cannot learn anything except the output, and different SMC protocols such as SPDZ protocol \citep{RN170} and threshold homomorphic encryption \citep{RN27} have been utilized in the privacy-preserving FL framework. A notable work is the secure aggregation protocol proposed by Bonawitz et al. \citep{RN44}. As reviewed in section 1, they developed a double masking solution, which achieves secure aggregation against colluding participants, and is robust to dropouts. However, the quadratic growth of computation overhead w.r.t. $N$ is the major bottleneck. Several subsequent works improve the efficiency based on the framework of SecAgg. One-shot reconstruction of the aggregate-mask was employed in a recent work \citep{RN175}, but can only work with a trusted third party(TTP). TurboAgg utilizes a circular communication topology to reduce the communication and computation overhead \citep{RN167}. SecAgg+ achieves polylogarithmic communication and computation per client via communication graph \citep{RN169}. FastSecAgg presents an FFT-based multi-secret sharing scheme to obtain $O(M\log N)$ cost \citep{RN168}. However, in SecAgg+, TurboAgg, and FastSecAgg, the robustness to dropouts and/or security guarantees are weaker than those of the original SecAgg.

\subsection{Cryptographic Tools}
\paragraph{Seed Homomorphic Pseudorandom Generator.} Recall that a pseudorandom generator (PRG) is a deterministic polynomial-time algorithm $F:\{0,1\}^l\rightarrow \{0,1\}^n$ such that $l<n$, and for randomly distributed $s\in \{0,1\}^l$ and $r\in \{0,1\}^n$, the distributions of $F(s)$ and $r$ are computationally indistinguishable. A PRG $F:\chi \rightarrow \gamma $, where $(\chi,\oplus)$ and $(\gamma, \otimes )$ are 
groups, is said to be seed homomorphic if the following property hold \citep{RN39}: 
For every $s_1, s_2\in \chi $, we have that $F(s_1)\otimes F(s_2)=F(s_1\oplus s_2)$.

A Seed Homomorphic Pseudorandom Generator(SHPRG) can be constructed basing on the Learning With Rounding (LWR) problem as $G(s)=\left \lceil A^{T}\cdot s\right \rfloor_p$, where $n,m,p,q$ satisfying $p<q, n<m$ are public parameters, $A$ is another public parameter randomly sampled from $\mathbb{Z}_q^{n\times m}$, and $\lceil\cdot\rfloor_p$ is defined as ${\left \lceil x \right \rfloor}_p=\left \lceil x\cdot p/q \right \rfloor$ for $x\in \mathbb{Z}_q$. It is almost seed homomorphic in the following sense: 
\begin{equation}
G(s_1+s_2)=G(s_1)+G(s_2)+e, e\in [-1, 0,1]^{m}.   
\end{equation}
Note that the security of the above SHPRG depends on the hardness of LWR$_{n,q,p}$ problem \citep{RN76}. The value of $1/p$ is proportional to the error rate $\alpha$ in Learning With Error (LWE) \citep{10.1145/1568318.1568324}, so the selection of parameters should assure that LWE$_{n,q,1/p}$ has difficulty satisfying the security level objective. 

Multiple privacy-critical applications have been built from Seed Homomorphic PRG or the related preliminary  Key Homomorphic Pseudorandom Functions, such as distributed PRFs, undatable encryption \citep{RN39} and private stream aggregation \citep{RN171, RN172}. The homomorphism property is in support of specific applications with provable security.

\section{SASH: Secure Aggregation Based on SHPRG}
In this section, we present an efficient privacy-preserving aggregation scheme based on SHPRG combining two layers of protocols: the Homomorphic Model Aggregation (HMA) protocol and the Masking Key Agreement (MKA) protocol. Figure~\ref{fig:overall} depicts the overall process of the mechanism for one epoch. The model updates are securely shared and computed following the HMA protocol, which calls the MKA protocol to return the demasking key to the aggregator and enable demasking to obtain the global model update. The process is repeated until the global model converges. Next, we will describe the two protocols in detail respectively.
\begin{figure}
	\centering
		\includegraphics[scale=0.35]{liu_405/overall.png}
	\caption{The Overall Process of SASH.}
	\label{fig:overall}
\end{figure}
\subsection{The Homomorphic Model Aggregation protocol}
In the Homomorphic Model Aggregation Protocol, the aggregation of clients’ local models is computed under the orchestration of the aggregator, ensuring no information about the individual models is revealed beyond their aggregated value. As shown in Figure~\ref{FIG:HMA}, the inputs of HMA are model updates of all related clients in the initial set $\mathcal{U}_0$ which is demoted as $m_{\mathcal{U}_0}$. Each client $u$ firstly utilizes SHPRG to generate the mask $G(k_u)$ for the current epoch. They take the masking key, which is a randomly sampled vector, as the input to the SHPRG, and stretch it to a mask for each entry of the model update. Then they upload the masked model updates to the aggregator. 
\begin{figure*}
\fbox{%
  \parbox{\textwidth}{%
    \begin{center}
    \textrm{
    \centering{\textbf{Homomorphic Model Aggregation Protocol}}
    \leftline{\textbf{Parameter}: a random matrix $A\overset{R}{\leftarrow}\mathbb{Z}_{q}^{\mu \times M},\mu, q, p, M\in\mathbb{N}$, with $q>p, \mu<M$} 
    \leftline{\textbf{Input}: $m_{\mathcal{U}_0}=\{m_u\}_{u\in \mathcal{U}_0}$ for the clients;}
    \leftline{\textbf{Output}: $m_0=\sum_{u\in \mathcal{U}_2} m_u$;}
    \leftline{\emph{Client $u$}:}
    \leftline{1: Generate the masking key $k_u$ by sampling random vector of $\mu$ entries.}
    \leftline{2: Preprocess the model update $m_u$ and encrypt the quantized model update $x_u$ to return $y_u=x_u+G(k_u) \mod P$.}
    \leftline{3: Upload $y_u$ to the server.}
    \leftline{\emph{Server}:}
    \leftline{1: Collect $y_u$ of all clients, and call the Masking Key 
    Agreement protocol which returns $k_0=\sum_{u\in \mathcal{U}_2} k_u$.}
    \leftline{2: Do the aggregation $y_0 = \sum_{u\in \mathcal{U}_2} y_u$, and unmask it by computing $x_0=y_0-G(k_0)$.}
    \leftline{3: Dequantize $x_0$ to obtain the final aggregation model update $m_0=\sum_{u\in \mathcal{U}_2} m_u$, and broadcast the averaged aggregation}\\
    \leftline{ $m_0/N_2$ to online clients for the next training.}
    }
    \end{center}
}%
}
\caption{The Homomorphic Model Aggregation Protocol.}	\label{FIG:HMA}
\end{figure*}

During this step for masking and uploading data, some clients may drop out. We denote the set of clients that have successfully uploaded masked data as $\mathcal{U}_1$. Clients in $\mathcal{U}_1$ and the aggregator run the MKA protocol, and some further client dropouts may happen. We denote the set of surviving clients after MKA as $\mathcal{U}_2$, and the aggregator should obtain $k_0 = \sum_{u\in \mathcal{U}_2}k_u$ from the MKA protocol. The aggregator then
sums up the model updates of clients in $\mathcal{U}_2$ and removes the mask which is $G(k_0)$. Subsequently, the aggregator dequantizes the result before 
computing the average over $N_2$ clients in set $\mathcal{U}_2$ and broadcasts the final aggregated model update. 

We instantiate the protocol by the almost seed homomorphic PRG introduced in Section 2. Since the output of SHPRG is in $\mathbb{Z}_{p}^{m}$, we set the public modulus $P$ in our scheme equal to $p$. We quantize the model updates by converting each bounded local model update to $w$-bit integer before adding masks. For a model update $m$ in $[m_{\textrm{min}}, m_{\textrm{max}})$, the quantized value of $m$ is \begin{equation}\label{1}
  Q(m)=\left\lfloor\frac{2^w(m-m_{\textrm{min}})}{m_{\textrm{max}}-m_{\textrm{min}}}\right\rfloor.
\end{equation}
where $\left\lfloor a \right\rfloor$ is the flooring function that maps $a\in\mathbb{R}$ to the largest integer not greater than $a$. The aggregation of quantized value over $N$ parties is at most $N(2^w-1)$, so we set $p>N(2^w-1)$ to make sure the summed model update does not overflow. For summation result $x$, the corresponding dequantization is performed by 
\begin{equation}\label{2}
 Q^{-1}(x) = 2^{-w}(m_{\textrm{max}}-m_{\textrm{min}})x+Nm_{\textrm{min}}.
\end{equation}

\subsection{The Masking Key Agreement Protocol}
In the Masking Key Agreement (MKA) Protocol, each client $u$ holds the masking seed $k_u$, and the aggregator and online clients in $\mathcal{U}_2$ collaboratively compute the sum of the masking keys of online clients $\sum_{u\in \mathcal{U}_2} k_u$ without disclosing the individual values to other clients or the aggregator. Various privacy protection approaches can be chosen to achieve secure aggregation. 

In particular, the protocol of SecAgg \citep{RN44} can be called to implement MKA. The original SecAgg protocol performs secure aggregation of the model updates exchanged during the FL 
learning process, while as an implementation of MKA, it only aggregates the SHPRG seeds. SecAgg is robust against user dropouts less than some threshold, and the rest of HMA is not affected by user dropouts, so the whole protocol is tolerant to dropouts. 

We can instantiate MKA with other secure aggregation solutions as well. As a tradeoff between efficiency and security/robustness, we can utilize SecAgg+ \citep{RN169}, FastSecAgg \citep{RN168}, or TurboAgg \citep{RN167} to reduce the computation and communication overhead within MKA further. Note that in these schemes, the security and robustness are somehow weaker than those of SecAgg and the ideal HMA, and the chosen MKA scheme determines the security and robustness of the overall HMA scheme. If one wants to further guarantee privacy against malicious participants, this protocol can also combine authentication or correctness verification.
\subsection{Correctness and Security}
In this section, we state our correctness and security theorems. We consider clients in $\mathcal{U}_0$ and the sever $A$ execute the HMA protocol with inputs $m_{\mathcal{U}_0}=\{m_u\}_{u\in \mathcal{U}_0}$, $\left | \mathcal{U}_0 \right |=N, \mathcal{U}_0\supseteq  \mathcal{U}_1 \supseteq \mathcal{U}_2$.  
\newtheorem{theorem}{Theorem}
\begin{theorem}[Correctness]
If participants in $\mathcal{U}_2$  follow the protocol, regardless of dropouts in  $\mathcal{U}_0\setminus \mathcal{U}_2$ (entries that are in $\mathcal{U}_0$
but not in $\mathcal{U}_2$), the server can obtain $\sum_{u\in \mathcal{U}_2}m_u$ with negligible noise based on the given $k_0=\sum_{u\in \mathcal{U}_2}k_u$, where $\left | \mathcal{U}_2 \right |=N_2$.
\end{theorem}


Proof: Because the selected PRG is almost seed-homomorphic, we have:
\begin{equation}
\begin{aligned}
\sum{_{i=1}^n}G(k_i)&=G(\sum{_{i=1}^n}k_i)+e \mod p\\& \textrm{where}, e\in \{-n+1,...,0,1,...,n-1\}
\end{aligned}
\end{equation}
For the HMA protocol, $y_u=m_u+G(k_u)\mod P$, where
$G(k_u)\in \mathbb{Z}_p^M, P=p,p \geq  N(2^w-1)+1) \geq  N_2(2^w-1)+1$, we have:
\begin{equation}
\begin{aligned}
m_0&=\sum_{u\in \mathcal{U}_2}y_u-G(k_0)\mod P\\
&=\sum_{u\in \mathcal{U}_2}(m_u+G(k_u))-G(\sum_{u\in \mathcal{U}_2}k_u)\mod P\\
&=\sum_{u\in \mathcal{U}_2}m_u+\sum_{u\in \mathcal{U}_2}G(k_u)-G(\sum_{u\in \mathcal{U}_2}k_u)\mod P\\
&=\sum_{u\in \mathcal{U}_2}m_u+e_0\mod P
\end{aligned}
\end{equation}
where $e_0\in\{-N_2+1,...,0,1,...,N_2-1\}$. The noise here is insignificant relative to the domain of 
aggregated quantized model updates ranging in $N_2(2^w-1)$, which can be demonstrated to have a negligible impact on the quality of the trained model. 

Theorem 2 below shows that HMA is secure against colluding participants, which may contain the aggregator, irrespective of how and when clients drop out. Those clients and the aggregator learn nothing more than their own inputs, and the sum of the inputs and masks of the other clients.

We consider executions of HMA with privacy threshold $T_{col}$, and underlying cryptographic primitives are instantiated with security parameters $\Lambda$. In such a protocol execution, the view of a client $u$ consists of its internal state (including its model update $m_u$, masking seed $k_u$, mask $G(k_u)$, the aggregated model update $\sum_{u'}m_{u'}$) and all messages this party received from other parties. The view of the server $A$ consists of the received information, including demasking seed $k_0$ and the masked model updates $\{y_u\}$ where $u\in \mathcal{U}_0$.

Given any subset $\mathcal{V}\subset \mathcal{U}_0\cup A$, let $\textsf{REAL}_{\mathcal{V}}^{\mathcal{U}_0,T_{col},\Lambda}$ be a
random variable representing the combined views of all parties in
$\mathcal{V}$ in the execution of HMA, where the randomness is over the internal randomness of all parties, and the randomness in the setup phase. We show that for any such set $\mathcal{V}$ of honest-but-curious clients of size up to $N-2$, the joint view of $\mathcal{V}$ can be
simulated given the inputs of the clients in $\mathcal{V}$, and the sum of the inputs and masks of the other clients.

\begin{theorem}[Security]
 There exists a PPT simulator $\textsf{SIM}$ such that for all $\mathcal{U}_0,m_{\mathcal{U}_0}, \mathcal{U}_1, \mathcal{U}_2$ and $\mathcal{V}\subset \mathcal{U}_0\cup A, \left | \mathcal{V}\setminus\mathcal{A} \right |<N-1 $, the output of SIM is computationally indistinguishable from the joint view of $\textsf{REAL}_{\mathcal{V}}^{\mathcal{U}_0,T_{col},\Lambda}$ of the parties in $\mathcal{V}$:
\begin{equation}
\begin{aligned}
&\textsf{REAL}_{\mathcal{V}}^{\mathcal{U}_0, T_{col}, \Lambda}(m_{\mathcal{U}_0}, \mathcal{U}_1, \mathcal{U}_2)\approx \\&\textsf{SIM}_{\ \mathcal{V}}^{\ \mathcal{U}_0,  T_{col}, \Lambda}(m_\mathcal{V}, z_m, z_k,  \mathcal{U}_1, \mathcal{U}_2)\\
&z_m=\sum_{u\in\mathcal{U}_2\setminus \mathcal{V}}m_u,z_k=\sum_{u\in\mathcal{U}_2\setminus \mathcal{V}}{G(k_u)}  
\end{aligned}
\end{equation}
\end{theorem}

Proof: We prove the theorem by a standard hybrid argument. We 
will present a series of hybrids from variable \textsf{REAL} to \textsf{SIM} where any two subsequent random variables are computationally 
indistinguishable. We assume that $A\in \mathcal{V}$, which indicates the view of the server should be considered. The case of $A$ not in $\mathcal{V}$ is much easier to prove and is omitted for brevity.
\begin{itemize}
\item[$\textsf{Hyb}_0$] In this hybrid, the variables are distributed exactly as in \textsf{REAL}. We choose a specific client ${u}'$ in $\mathcal{U}_2\setminus \mathcal{V}$. For this client, based on the given $z_m$ and $z_k$, we can write as $y_{{u}'}=m_{{u}'}+\textrm{G}(k_{{u}'})=z_m+z_k-\sum_{u\in\mathcal{U}_2\setminus v\setminus \{{u}'\}}y_u$.
\item[$\textsf{Hyb}_1$] In this hybrid, for a party $u$ in $\mathcal{U}_2\setminus \mathcal{V}\setminus \{u'\}$, in HMA protocol instead of sending $y_u=m_u+G(k_u)$, we send $y_u=m_u+P_u$, where $P_u$ is uniformly random. For ${u}'$, the masked data is still generated by $y_{{u}'}=z_m+z_k-\sum_{u\in\mathcal{U}_2\setminus \mathcal{V}\setminus \{u'\}}y_u$. The security of SHPRG guarantees that the distribution of \{$y_u: u\in \mathcal{U}_2\setminus \mathcal{V}\setminus \{{u}'\}$ is identically distributed to the corresponding one in $\textsf{Hyb}_0$. On the other hand, $y_{{u}'}$ is determined by \{$y_u: u\in \mathcal{U}_2\setminus \mathcal{V}\setminus \{{u}'\}\}$, $z_m$ and $z_k$,  so the distribution of \{$y_u: u\in \mathcal{U}_2\setminus \mathcal{V}\}$ is identically distributed to that in $\mathsf{Hyb}_0$.
\item[$\textsf{Hyb}_2$] In this hybrid, for party $u$ in $\mathcal{U}_0\setminus \mathcal{U}_2\setminus \mathcal{V}$, the simulator can just substitute their inputs of HMA protocol by uniform random vectors. Since the server will not do aggregation on their inputs and has no access to the values, the joint view of the parties in $\mathcal{V}$ does not depend on their inputs. Consequently, the joint view of the participants will be identical to the previous one. 
\item[$\textsf{Hyb}_3$] In this hybrid, for party $u$ in $\mathcal{U}_2\setminus \mathcal{V}\setminus \{{u}'\}$, we replace the uploaded data in HMA protocol by $y_u=P_u$, which is possible since $P_u$ was obtained in $\textsf{Hyb}_3$ to be uniformly random, $m_u+P_u$ is also uniformly random. For the chosen client ${u}'$, its uploaded data is still computed by $y_{{u}'}=z_m+z_k-\sum_{u\in\mathcal{U}_2\setminus \mathcal{V}\setminus \{{u}'\}}y_u$, which makes the joint view of clients in $\mathcal{U}_2\setminus \mathcal{V}$ consistent with the previous one, and the joint distribution of the data uploaded by clients in $\mathcal{U}_2$ stays identical. Hence the joint view of the participants including the server is indistinguishable from the previous hybrid.
\end{itemize}

Thus, the PPT simulator \textsf{SIM} that samples from the distribution described in the last hybrid can output computationally indistinguishable from \textsf{REAL}, the distribution can be computed based on $m_{\mathcal{V}},z_m,z_k$. The simulation does not restrict the number of joint viewed parties, which means HMA can preserve the security against the aggregator colluding with an arbitrary subset of up to $N-2$ clients.
\section{Evaluation}
In this section, we perform a detailed evaluation of SASH from the perspectives of efficiency, accuracy, privacy security and practicality theoretically and experimentally. We compare SASH with SOTA methods \citep{RN44, RN169, RN167, RN168} in Table~\ref{tab:data}. We can see that SASH achieves the best asymptotic computation efficiency for both clients and the aggregator, and exceeds previous methods in other aspects as well. 

Among the existing works, SecAgg is still the most practical method for achieving the best comprehensive performance, including security and robustness to dropouts. We select SecAgg as the baseline. SASH aims to overcome the efficiency bottleneck, without sacrificing other advantages.

\begin{table*}
    \centering
    \newcommand{\tabincell}[2]{\begin{tabular}{@{}#1@{}}#2\end{tabular}}
    \caption{Comparison of Efficiency, Security and Dropout Guarantees of Our Proposed Scheme and the Related Works.}\label{tab:data}

    \begin{tabular}{lccccc}
      \hline
      &\centering\bfseries SecAgg & \centering \bfseries SecAgg+ & \centering\bfseries Turbo &\centering\bfseries FastSec & \bfseries SASH     \\
      \hline 
      \bfseries \tabincell{l}{Computation \\(Client)}& \centering$O(MN+N^2)$& \centering {\tabincell{c}{$O(\log^2N+$\\$M\log N)$}}&\centering {\tabincell{c}{$O(M\log N$\\$\log^2\log N)$}}&\centering{$O(M\log N)$}&$O(M+N^2)$\\
      \hline
      \bfseries \tabincell{l}{Computation \\(Aggregator)}& \centering$O(MN^2)$& \centering \tabincell{c}{$O(N\log^2N+$\\$MN\log N)$}&\centering\tabincell{c}{$O(M\log N$\\$\log^2\log N)$}&\centering$O(M\log N)$&$O(M+N^2)$\\
      \hline
      \bfseries \tabincell{l}{Communication \\(Client)}&\centering $O(M+N)$&\centering$ O(M+\log N)$&\centering$O(M\log N)$&\centering$O(M+N)$&$O(M+N)$\\
       \hline
       \bfseries \tabincell{l}{Communication \\(Aggregator)} &\centering $O(MN+N^2)$&\centering $O(MN+N\log N)$&\centering$O(MN\log N)$&\centering$O(MN+N^2)$&$O(MN+N^2)$\\
       \hline
       \bfseries \tabincell{l}{Security\\$T_{col}$}&  \tabincell{l}{Adaptive \\$2N/3$} & \centering \tabincell{l}{Non-adaptive\\ parameter}&\centering \tabincell{l} {Non-adaptive\\ $N/2$}& \centering \tabincell{l}{Adaptive\\ $N/10$} & \tabincell{l}{Adaptive\\ $N/3$}\\
       \hline
       \bfseries \tabincell{l}{Dropout\\$D_{\textrm{max}}$}&\centering \tabincell{l}{Worst-case \\$N/3$}& \centering \tabincell{l}{Average-case\\ parameter}&\centering \tabincell{l}{Average-case\\ $N/2$}& \centering \tabincell{l}{Average-case \\$N/10$} & \tabincell{l}{Worst-case \\$N/3$} \\
       \hline
    \end{tabular}
\end{table*}
\subsection{Efficiency}
In this section, we analyze the computation and communication cost theoretically, and then conduct it by the experimental running time. 
\subsubsection{Analysis}
\paragraph{Computation Overhead} In the HMA protocol, the computation cost is mainly derived from computing SHPRGs to generate masks for each entry in the model update vector. The computation costs for clients and the aggregator are both $O(M)$ regardless of the number of clients and the client dropout rate. 

To analyze the efficiency improvement, we take SecAgg as an example of implementation. Recall that in SecAgg, the majority of computation cost comes from PRGs calculation expanding the various seeds to masks of $M$ entries. For each client, $N$ masks are required for one upload, entailing computation cost of $O(MN)$. For the aggregator, computation cost is $O(N^2M)$, which can be broken into $O(MN(1-d)+dMN^2(1-d))$, where $d$ is the fraction of dropped-out clients who present extra overhead for recovery, called as effective dropouts. Apparently, the computation cost of the aggregator increases quadratically with $N$ if $d$ is nonzero. 
The SecAgg in the MKA protocol is called to only aggregate the masking key of size $\mu$ instead of the model update with size $M$. The complexities of computation here are $O(N)$ and $O(N^2)$ for each client and the aggregator, respectively. 

Our scheme presents a further improvement of efficiency in dealing with dropouts. For the comparison with SecAgg, in addition to the difference of computational cost w.r.t. $N$, our protocol also incurs a much smaller effective dropout fraction $d_0$. This is because the masked model updates are uploaded before calling the MKA protocol, and the aggregator can aggregate and demask the model updates of remaining clients in $\mathcal{U}_2 $ correctly without considering the dropping-out clients in $\mathcal{U}_0 \setminus \mathcal{U}_2$, while the dropout users in $\mathcal{U}_1 \setminus \mathcal{U}_2$ are handled by the 
SecAgg protocol. In other words, 
$d_0\approx \frac{\left | \mathcal{U}_1 \setminus \mathcal{U}_2 \right |}{\left |  \mathcal{U}_0\right |}$. Since the 
clients may drop from the system at any time with a certain probability, the dropout fraction is positively correlated with the 
execution time of the corresponding process. The effective dropouts-related process in our solution is just the MKA protocol, while in SecAgg it includes the time-consuming step of masking and uploading model data. Therefore, the proportion of effective dropouts between our scheme and SecAgg is  $\frac{d_0}{d}\approx \frac{C_N+\mu}{C_N+M}$, where $C_N$ is a constant related to the number of clients. For $N=500, M=100k$, we get $d_0=d/7$ experimentally.
\paragraph{Communication Overhead}
The main contribution to the communication traffic comes from the HMA protocol in which the communication cost is $O(M)$ for each client and $O(MN)$ for the aggregator, which is equal to the plain learning FL. The only communication involved is the upload of masked models of size $M\log_2{p}$. 

For the MKA protocol, the size of each masking key needed to be transferred securely is fixed to $\mu$. The communication cost differs with different secure aggregation solutions, all independent of the number of model parameters. If SecAgg is employed in the MKA protocol, four rounds of communication are needed in the MKA protocol, and the communication cost is $O(N)$ for clients and $O(N^2)$ for the aggregator.
The total amount of transferred data of our scheme is dominated by the collection of masked model data, which is approximately $NM\log_2{p}$. The inflation factor relative to the communication traffic of the plain FL learning system is $\frac{NM\log_2 p+T}{NMw}\approx \frac{\log_2p}{w}$, where $T$ denotes the size of transferred data in the MKA protocol. For the selected protocol, the inflation factor is about 2.06 when $N=500, M=10^6, w=16, p=2^{32}$, which is the same as SecAgg. When $M$ or $N$ becomes larger, our inflation factor stays basically constant. While TurboAgg \citep{RN167} requires at least $\log N$ rounds of communication, with notably increased communication overhead. 
\subsubsection{Experimental Results}
To conduct the evaluation in experiments, we implement SASH and SecAgg in C++ with the following settings. We take $w =16$, and set the parameter of SHPRG used in the HMA protocol as $\mu = 512, p = 2^{32}, q = 2^{64}$, for which the LWE evaluator estimates a hardness of over $2^{128}$. Also, $q/p \geq \sqrt{\mu}$, which ensures the LWR problem appears to be exponentially hard for any $p$=poly($\lambda$) as described by \citep{RN174}. For the implementation of the MKA protocol, we choose the same cryptographic primitives as the original implementation in SecAgg. All experiments are run in a Lenovo server with the configuration of Ubuntu 20.04, Intel(R) Core i7-10700K 3.80GHz CPU and 32GB RAM.

We measure the running time of secure aggregation of a single FL epoch and compare SASH with the baseline. We further study the impact of model size, the number of clients, and dropout fraction. Since the secure aggregation is independent of the training process, synthesized vectors are used for locally trained models whose elements are encoded to 16-bit unsigned integers to test different model sizes better. The local training time is not included in the total running time, and the entire learning process can be deduced.  We execute the tests 500 times and conclude the average running time and the standard deviation. As the results in Figure~\ref{efficiency} illustrate, we conclude that:
\begin{enumerate}
    \item SASH improves the efficiency, and more importantly, the running time of execution increases more gently when the number of both model parameters and clients increases. Hence, SASH can be scaled to the FL systems with millions of model parameters and thousands of clients. 
    \item The running time of SASH is relatively stable as the user dropout rate increases. The degree of the improvement significantly increases as the dropout fraction increases, and when $d=0.3$, SASH provides a speedup of 20× over SecAgg. This gain is expected to increase further for larger $M$ and $N$.
\end{enumerate}
\begin{figure*}[htbp]
	\centering
	\begin{subfigure}{0.325\linewidth}
		\centering
		\includegraphics[width=1\linewidth]{liu_405/Figure_b2.png}
		\caption{The running time w.r.t $M$}
		\label{12}
	\end{subfigure}
	\centering
	\begin{subfigure}{0.325\linewidth}
		\centering
		\includegraphics[width=1\linewidth]{liu_405/Figure_b3.png}
		\caption{The running time w.r.t $N$}
		\label{2d}%文中引用该图片代号
	\end{subfigure}
	\centering
	\begin{subfigure}{0.325\linewidth}
		\centering
		\includegraphics[width=1\linewidth]{liu_405/Figure_b1.png}
		\caption{The running time w.r.t $d$}
		\label{3}%文中引用该图片代号
	\end{subfigure}
	\caption{Running Time of Executions. From left to right, (a) the running time as the number of model parameters increases for the FL system assembles 500 
clients without dropouts. (b) the running time as the number of clients increases with M=100k and d=0.1. (c) the running time with 
dropout fraction with M=100K and N=50. The dotted lines represent the standard deviation of the results.}
	\label{efficiency}
\end{figure*}

SASH has better efficiency than the SecAgg, providing the same robustness and security. If we relax the requirements of security guarantee and robustness to dropouts, we can instantiate MKA with other efficient methods as well. We call our scheme that uses the SecAgg+ to implement MKA as the SASH+. SASH+ provides the same security and dropout resilience as SecAgg+, and improves the efficiency to much more extent. The results of running time for different numbers of clients and different dropout rates are shown in Figure~\ref{fig:toronto}, which further demonstrates that SASH+ is more efficient than SecAgg+ in computation,  especially when the number of clients and dropout rate become larger. As for the communication overheads, they are almost the same for k=100. 

\begin{figure}
  \centering
  \includegraphics[width=1\linewidth]{liu_405/Figure_1.png}
  \caption{Comparison between SecAgg+, SASH+, SecAgg and SASH.}\label{fig:toronto}
\end{figure}

\subsection{Security}
As proved in Section 3, SASH can provide privacy guarantees against the server colluding with an arbitrary subset of clients in the honest-but-curious setting. For the ideal aggregation scheme, if the aggregator corrupts a set of clients, the remaining clients' partial aggregation results will be disclosed. The information obtained by the colluding participants in SASH is the same as the ideal case. 

The security of the HMA protocol in our scheme presents no restriction on the number of colluding parties, which means $T_{col}$ in our method depends on the implementation of the MKA protocol. As another aspect of privacy guarantee, SASH can mitigate adaptive adversaries. Recall that an adaptive adversary can choose the set of clients to corrupt during the protocol execution. In the proof of Theorem 2, the joint view of $\mathcal{V}$ can be simulated for any subset of parties without any restriction, so the adversary can adaptively choose the corrupted set at any stage of the protocol, which makes no difference to the conversion between hybrids and the final distribution of \textsf{SIM}. In comparison, in SecAgg+ and TurboAgg, since subsets of clients perform secure aggregation in stages, an adaptive adversary may corrupt all clients in such a subset, and cause information leakage. As summarized in Table~\ref{tab:data}, although SecAgg+ and FastSecAgg improve computation and communication efficiency to some extent, the security degrades. 
\subsection{Practicality}

As Table~\ref{tab:data} illustrates, SASH can provide worst-case dropout resilience, which means the protocol can maintain correctness and security against any subset of up to $D_\textrm{max}$ clients dropping out. On the other hand, the average-case dropout robustness is limited to only random dropouts. For the defined dropout tolerance $D_\textrm{max}$, SecAgg+ \citep{RN169} sets it as an adjustable variable, and larger $D_\textrm{max}$ demands more neighbors in the graph to provide security. In 
FastAgg \citep{RN168}, $D_\textrm{max}>N/10$ may result in failure to recover the secret, and $D_\textrm{max}$ must also be a constant fixed in advance. The proposed HMA protocol can be executed successfully with security guarantees for any 
$D<D_\textrm{max}=N-1$, so the dropout tolerance is determined by the MKA protocol. Furthermore, as discussed in Section 4.1.1, MKA in our scheme incurs a much smaller effective dropout fraction, making the dropout condition
of SASH $d_0=d_0 N < D_{\textrm{max}}$ satisfied more easily.

Apart from the solid dropout guarantee, the execution of SASH does not assume of the existence of a Trusted Third Party (TTP). Also, there is no direct communication between clients, making our scheme easy to implement in real world.

\subsection{Model Accuracy of FL system }
We evaluate the impact of the noise produced by almost 
homomorphic PRG on the model accuracy of the FL system. We implement three representative machine learning applications in FL, and perform plain aggregation and our proposed secure aggregation for each one. Our first application is a CNN model consisting of two convolutional layers with a total of about 0.2M parameters, trained over the FashionMNIST dataset \citep{mnist}. In another application, we train ResNet18 \citep{DBLP:journals/corr/HeZRS15} with 10M parameters on the CIFAR10 dataset \citep{2012Learning}. In the third application, we use Shakespeare dataset \citep{rnn} to train a customized LSTM \citep{hochreiter1997long} with 1.25M parameters. The three applications are based on different types of machine learning models of various sizes, and cover the learning task for image classification and text generation. The optimization approach for federated learning is the Federated Averaging algorithm \citep{mcmahan2017communication}. For plain FedAvg aggregation, the model updates are represented by real-valued vectors of 32 bits and uploaded for aggregation without encryption. 

For SASH, the model updates have two sources of error: (1) the model parameters are quantized into 16-bit integers before masking, and corresponding dequantization is done after aggregation; (2) SHPRG induces an error term to aggregated model parameters. To measure the model quality, we track the test accuracy for CNN and ResNet18. Training loss is used for LSTM as the dataset is unlabelled and has no test set. As Figure~\ref{da_chutian} shows, for one thing, compared with plain FedAvg, the convergence achieves after training for the same epochs, which means the speed of convergence is not affected. For another thing, the trained models obtained by SASH reach the same peak accuracy or bottom loss with the plain FedAvg.

\begin{figure}[htbp]
	\centering
	\begin{subfigure}{0.325\linewidth}
		\centering
		\includegraphics[width=1\linewidth]{liu_405/Figure_a5.png}
		\caption{CNN}
		\label{chutian3}
	\end{subfigure}
	\centering
	\begin{subfigure}{0.325\linewidth}
		\centering
		\includegraphics[width=1\linewidth]{liu_405/Figure_2.png}
		\caption{ResNet}
		\label{res}%文中引用该图片代号
	\end{subfigure}
	\centering
	\begin{subfigure}{0.325\linewidth}
		\centering
		\includegraphics[width=1\linewidth]{liu_405/Figure_a3.png}
		\caption{LSTM}
		\label{lstm}%文中引用该图片代号
	\end{subfigure}
	\caption{The Quality of Trained Model}
	\label{da_chutian}
\end{figure}



\section{Conclusion}
This paper presents an efficient and practical secure aggregation scheme based on SHPRG. We demonstrate our scheme from the following aspects: (1) our scheme achieves 
better asymptotic computation costs than previous solutions, and improves the efficiency up to 20× over baseline experimentally. (2) the proposed scheme is proved to provide adaptive security against the aggregator colluding with an arbitrary subset of clients. 
(3) our scheme is robust to worst-case dropouts and simple to implement in a standard Internet environment for non-TTP assumptions. (4) the trained model can obtain the same accuracy as plain training cases. 

For future work, an extension of the scheme to cross-silo FL settings, and the Byzantine-robustness of the scheme can be investigated. 

\begin{acknowledgements} % will be removed in pdf for initial submission,
	% so you can already fill it to test with the
	% ‘accepted’ class option
	This paper is supported in part by the National Key Research and Development Program of China under grant No. 2020YFB1600201, National Natural Science Foundation of China (NSFC) under grant No. (U20A20202, 62090024, 61876173), and Youth Innovation Promotion Association CAS.
\end{acknowledgements}



\bibliography{liu_405}

\appendix


\end{document}
