\documentclass{midl} % Include author names
% \documentclass[anon]{midl} % Anonymized submission

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{mwe} % to get dummy images
\usepackage{graphicx}
\usepackage{hyperref}
\usepackage{amsmath}
\usepackage{amsfonts} 
% \usepackage{subfigure}
\usepackage{float}  
\usepackage{multirow}
\usepackage{lipsum} 
\usepackage{enumitem}
\usepackage{algorithm}
\usepackage{booktabs}
\usepackage{xcolor}
\usepackage[compact]{titlesec}
\usepackage[noend]{algpseudocode}
% \usepackage[maxbibnames=10]{biblatex}
% \addbibresource{ref_bibtex.bib}

\expandafter\def\expandafter\normalsize\expandafter{%
    \normalsize%
    \setlength\abovedisplayskip{1pt}%
    \setlength\belowdisplayskip{6pt}%
    \setlength\abovedisplayshortskip{-6pt}%
    \setlength\belowdisplayshortskip{2pt}%
}

\jmlrvolume{-- 238}
\jmlryear{2024}
\jmlrworkshop{Full Paper -- MIDL 2024}
\editors{Accepted for publication at MIDL 2024}

\title[FedFDD: Federated Learning with Frequency Domain Decomposition for Low-Dose CT Denoising]{FedFDD: Federated Learning with Frequency Domain Decomposition for Low-Dose CT Denoising}

 % Use \Name{Author Name} to specify the name.
 % If the surname contains spaces, enclose the surname
 % in braces, e.g. \Name{John {Smith Jones}} similarly
 % if the name has a "von" part, e.g. \Name{Jane {de Winter}}.
 % If the first letter in the forenames is a diacritic
 % enclose the diacritic in braces, e.g. \Name{{\'E}louise Smith}

 % Two authors with the same address
 % \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\and
 %  \Name{Author Name2} \Email{xyz@sample.edu}\\
 %  \addr Address}

 % Three or more authors with the same address:
 % \midlauthor{\Name{Author Name1} \Email{an1@sample.edu}\\
 %  \Name{Author Name2} \Email{an2@sample.edu}\\
 %  \Name{Author Name3} \Email{an3@sample.edu}\\
 %  \addr Address}


% Authors with different addresses:
% \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\\
% \addr Address 1
% \AND
% \Name{Author Name2} \Email{xyz@sample.edu}\\
% \addr Address 2
% }

%\footnotetext[1]{Contributed equally}

% More complicate cases, e.g. with dual affiliations and joint authorship
\midlauthor{\Name{Xuhang Chen\nametag{$^{1,2}$}} \Email{xc369@cam.ac.uk}\\
% \addr $^{1}$ Brain Physics Laboratory, Division of Neurosurgery, Department of Clinical Neurosciences, University of Cambridge, Cambridge, UK \\
% \addr $^{2}$ Department of Electrical and Electronic Engineering \& I-X, Imperial College London, UK\AND
\Name{Zeju Li\nametag{$^{3}$}} \Email{zeju.li@ndcn.ox.ac.uk}\\
% \Name{Cheng Ouyang\nametag{$^{2}$}} \Email{c.ouyang@imperial.ac.uk}\\
\Name{Zikun Xu\nametag{$^{1}$}} \Email{zikun.xu22@imperial.ac.uk}\\
\Name{Kaijie Xu\nametag{$^{1}$}} \Email{nortrom@berkeley.edu}\\
\Name{Cheng Ouyang\nametag{$^{4,5}$}} \Email{c.ouyang@imperial.ac.uk}\\
\Name{Chen Qin\nametag{$^{1}$}} \Email{c.qin15@imperial.ac.uk}\\
\addr $^{1}$ Department of Electrical and Electronic Engineering $\&$ I-X, Imperial College London, UK\\
\addr $^{2}$  Department of Clinical Neurosciences, University of Cambridge, Cambridge, UK\\
\addr $^{3}$ Nuffield Department of Clinical Neurosciences, University of Oxford, UK\\
\addr $^{4}$ Institute of Clinical Sciences, Imperial College London, UK\\
\addr $^{5}$ Department of Engineering Science, University of Oxford, UK\
}

\begin{document}

\maketitle

\begin{abstract}
Low-dose computed tomography (LDCT) enables imaging with minimal radiation exposure but typically results in noisy outputs. Deep learning algorithms have been emerging as popular tools for denoising LDCT images, where they typically rely on large data sets requiring data from multiple centers. However, LDCT images collected from different centers (clients) can present significant data heterogeneity, and the sharing of them between clients is also constrained by privacy regulations. In this work, we propose a personalized federated learning (FL) approach for enhancing model generalization across different organ images from multiple local clients while preserving data privacy. Empirically, we find that earlier FL methods tend to underperform single-set models on non-IID LDCT data due to the presence of data heterogeneity characterized by varying frequency patterns. To address this, we introduce a Federated Learning with Frequency Domain Decomposition (FedFDD) approach, which decomposes images into different frequency components and then updates high-frequency signals in an FL setting while preserving local low-frequency characteristics. Specifically, we leverage an adaptive frequency mask with discrete cosine transformation for the frequency domain decomposition. The proposed algorithm is evaluated on LDCT datasets of different organs and our experimental results show that FedFDD can surpass state-of-the-art FL methods as well as both localized and centralized models, especially on challenging LDCT denoising cases. 
Our code is available at \href{https://github.com/xuhang2019/FedFDD}{\texttt{https://github.com/xuhang2019/FedFDD}}.
%We believe FedFDD can bolster diagnostics and patient safety in LDCT with improved image quality, and also inspire the improvements of other image restoration algorithms in the federated learning environment.
\end{abstract}

\begin{keywords}
Federated Learning, Low-Dose CT Denoising, Discrete Cosine Transform.
\end{keywords}

\section{Introduction}
Federated learning (FL) is a decentralized approach that can integrate data from multiple local clients for training machine learning models in a privacy-preserving way~\cite{pati2022federated}. FL is particularly well-suited for medical imaging tasks that require extensive data, where privacy is a major concern. For example, FL has been discussed for applications including COVID-19 classification~\cite{dayan2021federated}, skin lesion classification \cite{yan2023labelefficient}, and human activity recognition~\cite{rieke2020future,rauniyar2023federated}. 


Machine learning models typically rely on large training data sets for achieving good generalization capability. Low-dose Computed Tomography (LDCT) denoising models have been shown to perform better by expanding the training dataset with samples from different sources and various anatomies~\cite{yang2023transfer, immonen2022use}. Nevertheless, a centralized training paradigm may not be always feasible, as local clients may be reluctant to share data due to privacy concerns. Hence, in this study, we propose to investigate a novel LDCT denoising method that can leverage data from diverse clients in a privacy-preserving way, with each personalized local model specializing in different anatomical regions. 
%For example, one client focuses on chest data, another on abdomen, and a third on head.

\noindent \textbf{Related works.} Fruitful research has been proposed in the LDCT denoising field. RED CNN~\cite{chen2017lowdose} adopts residual skip connection and small convolution kernels to enable smooth variation on different layers of feature maps for LDCT denoising. Transformer-related works have also been proposed to utilize the pretrained data~\cite{jing2022training} and enrich the diversity and effectiveness of features ~\cite{wang2023ctformer}. Similarly, adversarial learning methods~\cite {wolterink2017generative, han2022dualencodersingledecoder} have also been studied to learn the denoising task. However, few of them addressed the CT denoising issue in the FL settings. Current FL approaches mainly focus on addressing classification or segmentation challenges rather than tasks related to image restoration. FedAvg~\cite{mcmahan2017communicationefficient} proposed to iteratively average the model weights from local models following each local update step, assuming that all clients have independent and identically distributed (IID) data, which however is often not the case in real-world scenarios. To mitigate the sub-optimal performance caused by non-IID data, FedProx~\cite{li2020federateda} integrated a proximal term to the objective function to guide the local updates to align with the global model, thereby improving convergence with heterogeneous data. FedBN~\cite{li2021fedbna} was proposed to freeze the Batch Normalization layer when training the domain-shifted medical data for stabilizing the training and thus improving the averaging model. Besides, MOON \cite{li2021modelcontrastive} has added a contrastive loss from the latent vector of each client to the global model, which aims to alleviate the global optimal parameter deviation issue by reducing the imbalanced gradient drift. 
% Besides, FedCross~\cite{xu2023federated} proposed to replace the aggregation step with the local model transmission to achieve stable performance in FL medical imaging segmentation. 


% More recently, processing of LDCT in FL settings has also raised attention whereas only a very few works exist. This mainly includes a personalized FL approach using localized hypernetwork of the CT scanning physical properties~\cite{yang2022hypernetworkbased} for the reconstruction of CT projection imaging, however, they did not consider the case of various anatomies. Here, our study reveals that the majority of current state-of-the-art FL algorithms can exhibit sub-optimal performance in the non-IID LDCT denoising tasks. This is potentially due to their predominant emphasis on representation learning for classification and segmentation tasks, neglecting the exploration of effective image restoration. 
%\textcolor{red}{
More recently, FL methods in low-level medical imaging tasks have also raised attention, such as in image reconstruction. For instance, FedMRI~\cite{feng2023specificitypreserving} has proposed to divide the reconstruction model into a global-shared encoder and separate local-preserved decoders with a weighted contrastive regularization, which has been shown to improve the efficiency and accuracy of MRI reconstruction tasks. FedPR~\cite{feng2023learning} further proposed a federated paradigm to only communicate the pre-trained-model-generated prompts and optimize them in an approximate null space of global prompts. Of particular relevance to our work, HyperFed~\cite{yang2022hypernetworkbased} has proposed to utilize localized hypernetworks based on simulated geometric parameters and dose levels to guide the CT reconstruction task, i.e., reconstructing the projection data to the imaging data. %\textcolor{red}{
In contrast, our work focuses on the LDCT denoising with various anatomies in an FL setting, and the design of our proposed model is inherently inspired by the denoising nature.

\noindent \textbf{Our contributions.} We propose a Federated Learning with Frequency Domain Decomposition (FedFDD) strategy for training the LDCT denoising model in a privacy-preserving way. FedFDD is motivated by the observation that LDCT images share common patterns across different anatomies, particularly in high-frequency components (\textit{i.e.,} noises). Therefore, we aim to leverage that and enhance the learning of the denoising task in the general high-frequency domain using an FL approach while preserving individual low-frequency components (\textit{i.e.} semantic anatomical structures) \cite{yang2022hypernetworkbased} for each local client.

Our contributions are mainly threefold: 1) We introduce a novel dual-path FL strategy with frequency domain decomposition to split the feature space for training and maintain gradient stability during model aggregation under non-IID conditions. 2) We propose to leverage data from different anatomies in LDCT denoising tasks, to mitigate data scarcity in FL settings. 3) We demonstrate that selectively updating high-frequency components in a dual setting significantly enhances the model's performance in LDCT denoising tasks, with competitive performance against state-of-the-art FL methods. Our proposed approach aligns with the nature of noise removal and indicates a promising direction in FL for imaging.

\section{Methods}

\subsection{Federated Learning Problem Formulation}

The main goal of our FL method is to utilize datasets from different clients in a privacy-preserving way. Specifically, we aim to construct a model that outperforms localized models, \textit{i.e.,} models trained on local datasets. In order to prevent privacy leakage, we follow the setting that data from different clients cannot be communicated (\textit{i.e.}, different hospitals do not allow patient data transmission). Given that there are $N$ clients with their own datasets $\mathcal{D}^{i}, i = 1,2, ... , N$ and the optimization loss function as $\mathcal{L}$, we want to achieve:
\begin{equation}
    \mathop{\arg\min}\limits_{\omega_{global}} (\sum_{k=1}^{N} p_k \mathcal{L} (\mathcal{D}^{k}; \omega_{global})),
\end{equation}
where $\omega_{global}$ denotes parameters of a global model, and $p_k$ is the weight of each local dataset, defined as $p_k = ||\mathcal{D}^{k} ||/ \sum_{i=1}^{N} ||\mathcal{D}^{k}||$ where $||\mathcal{D}^{k}||$ is the size of the $k$-th dataset.

Previous research~\cite{xu2023federated} mentioned that the conventional mini-batch gradient descent method aimed to update the model parameters at $j+1$ time step on the client $k$ by $\omega^{j+1}_{k} = \omega^{j}_{k} - \eta \nabla\mathcal{L} (\mathcal{D}^{k};  \omega^{j}_{k})$, where $\eta$ is the learning rate and $\nabla$ calculates the gradient w.r.t $\omega^j_k$. In this way, the global model parameters can be aggregated by $\omega^{j+1}_{global} = \sum_{i=1}^{N} p_i \omega^{j+1}_{k} $. However, the non-IID property of $\mathcal{D}^{k}$ would generate varying directions of $\nabla\mathcal{L} (\mathcal{D}^{k}; \omega^{j}_{k} )$ and the weighted operation would therefore drift the global parameter to a sub-optimal solution \cite{mcmahan2017communicationefficient}.

In our LDCT denoising setting with images of various anatomies from different clients, we observed that the mainstream FL scheme suffered from the drifted direction caused by data of \emph{different anatomical regions}. Therefore, we aim to alleviate the misleading impact of the varying anatomical structures on the denoising task and guide the model to learn the denoising essence. To achieve this, we propose to decompose the denoising task into two paths: one path consists of part of the model that updates the local parameters $\omega_{\text{anatomy}}$ corresponding to refining each anatomical structure, and the other specifically deals with the noise reduction part with model weights of $\omega_{\text{denoise}}$. In this way, the global model will only be updated with the gradient direction of the denoising part $\nabla\mathcal{L} (\mathcal{D}^{k}; \omega^{j}_{k,\text{denoise}} )$, without the negative influence from the aggregation of the drifted item $\nabla\mathcal{L} (\mathcal{D}^{k}; \omega^{j}_{k,\text{anatomy}})$. The model parameter at the client $k$ can then be viewed as $\omega_k = \omega_{k, \text{anatomy}} \cup \omega_{k, \text{denoise}}$. Therefore, the optimization problem can be formulated as: 
\begin{equation}
    \mathop{\arg\min}\limits_{\omega_{1}, ... , \omega_{k}} (\sum_{k=1}^{N} p_k \mathcal{L} (\mathcal{D}^{k}; \omega_{k})).
\end{equation}
During the aggregation at time step $j+1$,  $\omega^{j+1}_{\text{denoise}}$ and $\omega^{j+1}_{k}$ are updated as $\omega^{j+1}_{\text{denoise}} = \sum_{i=1}^{N} p_i \omega^{j+1}_{k, \text{denoise}} $ and $\omega^{j+1}_{k} = \omega^{j+1}_{ \text{denoise}} \cup \omega^{j+1}_{k, \text{anatomy}} $.

\subsection{Federated Learning with Frequency Domain Decomposition}
Based on the above formulation, we propose a dual-path FL strategy for LDCT denoising, as shown in Fig. \ref{fig-freq_example}. Noise in LDCT can be represented using various noise models including quantum noise from X-ray~\cite{yang2023transfer}, normally distributed stochastic process noise~\cite{li2023lowdose}, and speckle and streak \cite{yang2020highfrequency}~noise, which are commonly corresponding to the high-frequency component in the images. On the other hand, the anatomical structures can be represented by the low-frequency component of the images. Motivated by this, we propose to decompose the effects of anatomical structures and noises through frequency domain decomposition, thereby updating a global denoising model component with high-frequency data across different anatomies while preserving personalized local low-frequency model components for each client, as seen in Fig. \ref{fig-freq_example}.

\begin{figure}[!t]
\centering
\includegraphics[width=0.75\columnwidth]{imgs/fedfdd_example_2.0.png}
\caption[The conceptual figure of the proposed FedFDD method.]{The conceptual diagram of the proposed personalized federated learning models with two frequency fusion methods. The LDCT images are split into different frequency components and the model will update high-frequency signals in an FL setting (the network branch colored in orange) while preserving local low-frequency characteristics (the network branch colored in blue). Both frequency components are then fused to form the output.  }
\label{fig-freq_example}
\end{figure}

\subsubsection{Frequency Decomposition with Adaptive Frequency Mask}
In detail, for the frequency domain decomposition, we propose to design a mask to decompose the image and utilize an FL strategy to learn the model parameters for the denoising part, \textit{i.e.,} $\omega_{ \text{denoise}}$.
Specifically, the Discrete Cosine Transformation (DCT) is adopted for the frequency decomposition due to its real-value property (Fig.~\ref{fig-mask_example}). The majority of the informative part of the image can then be efficiently condensed into a small number of coefficients with the DCT, especially in the low-frequency coefficients, distributed in the upper-left corner of the DCT frequency domain. The low-frequency part mainly includes semantic-related components of the images (\emph{i.e.,} the structure of anatomies),  and the noise is mainly distributed into the remainder area (Fig. \ref{fig-mask_example}).

Therefore, we use a Bernoulli binary mask to separate the semantic component (the low-frequency component) from the noisy area (the high-frequency component) inspired by ~\cite{yue2021robust}. This is achieved by setting a low-frequency threshold $r_l$, within which coefficients are preserved (mask value 1), while those beyond it are determined by a Bernoulli distribution based on their normalized Euclidean distance $r_{u,v}$ to retain potentially useful high-frequency information. The mask can be formulated as:
\begin{equation}
M_{u,v}=\left\{
\begin{aligned}
1 & , & 0 \leq r_{u,v} < r_l \\
\text{Bernoulli}(r_{u,v}) & , & r_l \leq r_{u,v} \leq 1 
\end{aligned}
\right..
\end{equation}
With the defined binary adaptive frequency mask (Appendix \ref{appendix: mask-design}) $M$ and an input $X$, the low frequency image part $X_L$ and high frequency part $X_H$ can be derived as: $X_L= \mathcal{F}^{-1} (\mathcal{F}(X) \odot M )$, $X_H = \mathcal{F}^{-1} (\mathcal{F}(X) \odot (1-M)$. 
As both the DCT and the inverse DCT process are linear, we can have $X = X_L + X_H$, which allows for the linear fusion strategy. 

% \begin{equation}
% \left\{
% \begin{aligned}
% X_L& = & \mathcal{F}^{-1} (\mathcal{F}(X) \odot M ) \\
% X_H & = & \mathcal{F}^{-1} (\mathcal{F}(X) \odot (1-M) ) 
% \end{aligned}
% \right.
% \end{equation}

\begin{figure}[!t]
\centering
\includegraphics[width=0.9\columnwidth]{imgs/mask_4.0.png}
\caption[Adaptive frequency mask and the density distribution figures.]{Left: The image and frequency space of LDCT image and its two frequency components. Right: The intensity histogram of different images.}
\label{fig-mask_example}
\end{figure}


\subsubsection{The Overall Model Architecture}

 Fig.~\ref{fig-mask_example} demonstrates that the low-frequency components pixel intensity distribution (Low-f) of LDCT correlates well with the distribution of the NDCT image (\textit{i.e.,} clear, full-dose imaging), while high-frequency components (High-f) predominantly exhibit noise with an IID distribution. Motivated by this, we propose to tackle the non-IID distribution characteristics of the problem by decomposing the model into dual paths, where one network branch (parameterized by $\omega_{k, anatomy}$) is designed to preserve the anatomy locally and the other branch aims to tackle the noise part using FL, leveraging its effectiveness in dealing with IID data (\textit{i.e.,} high-frequency components in this case).

An illustration of the FedFDD model is shown in Fig.~\ref{fig-freq_example}. Both the low $f$ and high $f$ backbone models are inherited from the RED-CNN model \cite{chen2017lowdose}. The first convolutional layer outputs a single-channel feature map taking both the frequency component and the original LDCT image as input. This layer can be viewed as a feature fusion layer. The frequency component, exclusively treated as a residual item added to the output of the branch, is expected to drive the branch to learn the intrinsic features within each branch (\emph{i.e.,} anatomies for low frequency and denoising for high frequency). During the aggregation, the low-frequency path (the blue branch) is reserved for each local client, and the high-frequency path (the orange branch) is aggregated. The objective is defined by the Mean Squared Error (MSE) between the reconstructed image and the NDCT standard. The model is trained in an end-to-end way. 

%This architectural design endows each branch with the flexibility to assimilate significant denoising functionalities, fostering a synergistic learning environment.

\section{Experiments}

\subsection{Datasets and implementation details}

The LDCT and NDCT datasets~\cite{moen2021lowdose} are licensed by The Cancer Image Archive (TCIA) team. Original data is collected by the Mayo Clinic, containing the abdomen, chest, and head regions. We simulated 36 patients with different anatomies as three clients (\emph{Client 1: 12 Abdomen, Client 2: 12 Chest and Client 3: 12 head}). We provide detailed data settings in Appendix~\ref{appendix:detailed-data}. We divide those datasets into $60\%$ (training), $10\%$ (validation), and $30\%$ (testing). The modified dual-path network is trained with an MSE loss with Adam optimizer for a total of 200 epochs. The learning rate is initialized as $10^{-4}$ and decayed per $3000$ iterations. We stop the training process early if the validation loss fails to decrease for 10 consecutive epochs~\cite{qian2024dpflaes}. The $r_l$ of the mask is $0.45$. The Hounsfield Units (HU) window of the CT images is $[-160,240]$ and the images are normalized to $[0, 1]$ by the minimum of $-1024$ and a maximum of $3072$~\cite{yang2022hypernetworkbased, bera2023self}. Patch training is adopted with patch size $64 \times 64$ and a total of $16$ per image following~\cite{bera2021noise}.
% \subsection{Experimental Design}

\subsection{Experimental Results and Discussion}
\subsubsection{Comparison Study}

\noindent \textbf{Limitation of current FL approaches.}
Contrary to expectations, state-of-the-art FL methods (e.g., FedAvg, FedProx, MOON, and FedBN) do not surpass localized or centralized training in LDCT denoising tasks, as evidenced by Table \ref{tab:comparison}. Typically, FL is expected to excel by leveraging diverse client data, but the non-IID nature of the LDCT dataset and notable domain shift (Fig.~\ref{fig-mask_example}) impede this advantage. Current FL approaches mostly do not address these specific challenges and thereby exhibit constrained performance in such scenarios. In contrast, centralized training benefits from comprehensive data exposure, facilitating superior generalization, whereas the effectiveness of localized training is curtailed by its limited dataset scope. This discrepancy is particularly marked in chest dataset comparisons, underscoring the limitations of existing FL approaches in handling non-IID distributions and domain variability.


\begin{figure}[!t]
\centering
\includegraphics[width=0.85\columnwidth]{imgs/deviation_figure_5.0.png}
\caption[Results of different federated learning algorithms.]{Top: Qualitative examples from different federated algorithms. Bottom: The error maps when compared with the ground truth. 
% c.f. Appendix~\ref{appendix:detailed-data} for more results.
}
\label{fig-fed_figure}
\end{figure}

\noindent \textbf{Comparison results.} We compare our proposed FedFDD with state-of-the-art FL algorithms in Table \ref{tab:comparison}. 
% \textcolor{red}{As HyperFed and FedPR approaches are not directly applicable to our task, we proposed to adapt them via introducing the client information as hyperparameters/prompts to inform the denoising process, named PromptFL.
% Detailed experimental settings are provided in Appendix \ref{appendix:baseline}.} 
Our method outperforms other methods in terms of PSNR and SSIM in most cases, particularly in the more challenging case of Chest CT denoising~\cite{mccollough2016tufg207a04}. Notably, FedFDD brings improvements of up to $7.2$ dB PSNR when compared with original LDCT images and $1.2$ dB compared with FedAvg on Chest data. Our experimental results indicate the effectiveness of the proposed method, which specifically considers the inherent challenges of LDCT denoising. Undesirable performance in the Head dataset could be attributed to the subtle discrepancy between LDCT and NDCT (Appendix \ref{appendix:detailed-data}) where all FL approaches do not outperform the localized training. Despite that, our approach can achieve a higher SSIM compared to baseline methods.  In contrast, other advanced FL algorithms underperform the localized training on all three datasets. 
% This underlines a crucial insight: the task of LDCT denoising is difficult under an FL setting and requires meticulous algorithmic design.
Furthermore, we visualize the denoised images and their corresponding error maps in Fig.~\ref{fig-fed_figure}. We find that the error produced by FedFDD is significantly less than that of others and the denoised image has well maintained the structural details and textures.

\begin{table}[!t]
\vspace{-15pt}
\centering
\caption{Quantitative results of the different methods. The best results are in \textbf{bold}.}
\label{tab:comparison}
\resizebox{0.8\columnwidth}{!}{
\begin{tabular}{c|cc|cc|cc}
\hline
\multirow{2}{*}{\textbf{Method}} & \multicolumn{2}{c|}{\textbf{Chest}} & \multicolumn{2}{c|}{\textbf{Abdomen}}  & \multicolumn{2}{c}{\textbf{Head}}\\
\cline{2-7}
& \textbf{PSNR$\uparrow$} & \textbf{SSIM$\uparrow$} & \textbf{PSNR$\uparrow$} & \textbf{SSIM$\uparrow$}   & \textbf{PSNR$\uparrow$} & \textbf{SSIM$\uparrow$}  \\
\hline
LDCT & 15.3388& 0.6857& 28.3165 &0.8213 &43.8707 & 0.9652 \\
Localized & 22.1312 & 0.7443 & 32.3983 & 0.8790 & \textbf{44.3414} & 0.9756 \\
Centralized & 22.4649 & 0.7534 & 32.4423 & 0.8809 & 43.6559 & \textbf{0.9816} \\
\hline
FedAvg & 21.3752 & 0.7538 & 31.8950 & 0.8735 & {43.1911} & 0.9788 \\
FedProx & 19.4455 & 0.7087 & 28.8216 & 0.8448 & 35.4972 & 0.9638 \\
MOON & 21.2922 & 0.7509 & 31.4486 & 0.8641 & 40.6419 & 0.9746 \\
FedBN & 21.8685 & 0.7472 & 31.8466 & 0.8716 & 42.2921 &0.9786 \\
FedMRI \footnotemark[1] & 22.1496 & 0.7452 & 32.2135 & 0.8792 & 42.5368 & 0.9788\\
PromptFL \footnotemark[2]  & 21.9730 & 0.7483 & 31.9425 & 0.8715 & 42.2195 & 0.9782 \\
FedFDD & \textbf{22.6209} & \textbf{0.7586} & \textbf{32.4510} & \textbf{0.8823} & 43.0421 & 0.9792\\
\hline
\end{tabular}
}
\end{table}
\footnotetext[1]{FedMRI \cite{feng2023specificitypreserving} is initially not designed for LDCT denoising. We adapted it to our task for further substantiation. The details can refer to Appendix \ref{appendix:baseline}.}
\footnotetext[2]{We adapt the idea of HyperFed~\cite{yang2022hypernetworkbased} and FedPR~\cite{feng2023learning} to our task, named as PromptFL. We utilize client-specific text embedding to guide the denoising process, as detailed in Appendix \ref{appendix:baseline}.}


% \textcolor{red}{The idea of Exp2 and Exp3 is from~\cite{yang2022hypernetworkbased,feng2023learning, feng2023specificitypreserving}. 

%\subsubsection{Federated Strategy On Our Model}
\subsubsection{Ablation study}



% \label{tab:fl_freq}
% \end{table}

\noindent \textbf{Effects of FL strategy in different frequencies.} Fig.~\ref{fig-rl_sensi} presents the experiments ascertaining the benefits of exclusively updating the high-frequency components. Here, $\text{FedFDD}_{all}$ represents a learning strategy that updates both the parameters of the dual-path model during aggregation and $\text{FedFDD}_{lf}$ means that we only update the low-frequency path with the high-frequency path frozen during aggregation.
Our proposed approach, i.e., $\text{FedFDD}_{hf}$, which only updates the high-frequency components, achieves the best performance across all metrics. This indicates that our strategy of focusing on high-frequency updates and freezing the low-frequency components is effective. In contrast, $\text{FedFDD}_{lf}$ approach shows a slight deterioration in performance compared with $\text{FedFDD}_{all}$. This is likely due to the reason that the merge of low-frequency components from different clients could introduce inconsistencies, leading to a potential loss of specific structural details and thereby affecting the overall image quality. The better performance of $\text{FedFDD}_{hf}$ compared with $\text{FedFDD}_{all}$ also indicates that the proposed targeted update strategy, focusing on high-frequency components, can be more effective than a holistic update.

\begin{figure}[!t]
\centering
\includegraphics[width=0.85\columnwidth]{imgs/ablation.png}
\caption[]{Ablation study. Left: Results of different aggregation strategies in frequency-division methods. Right: Sensitivity analysis of the threshold of the adaptive frequency mask $r_l$. Results are shown on Chest data.}
\label{fig-rl_sensi}
\end{figure}
	
\noindent \textbf{Effects of varying frequency split thresholds.} We show the model performance with varied thresholds $r_l$'s in Fig.~\ref{fig-rl_sensi}. Recall that higher $r_l$ refers to less information in the high frequency images. When the threshold is low, the low-frequency component (intrinsic anatomical area) leaks to high frequency updating procedure, which results in a deterioration of the model performance. Besides, there is a slight peak around $0.4$ to $0.5$, suggesting the optimal value within the range. When $ r_l > 0.6$, the performance slightly decreases because the model would benefit less from FL. 


\section{Conclusions}
In this study, we proposed the FedFDD model and demonstrated its effectiveness on the LDCT denoising task across different anatomical images. We proposed an FL strategy with frequency domain decomposition, where only the high-frequency components of the network are updated during the aggregation process. This ensures that the intrinsic characteristics of the low-frequency components are preserved locally. Our method achieves up to $1.2$ dB improvement compared with the state-of-the-art FL algorithms. Notably, it also outperforms both single-set and centralized training, particularly in more noisy scenarios (e.g., on Chest data). In the future, we plan to validate FedFDD in an out-of-federation setting for data from unseen clients.



% \begin{table}[htbp]
%  % The first argument is the label.
%  % The caption goes in the second argument, and the table contents
%  % go in the third argument.
% \floatconts
%   {tab:example}%
%   {\caption{An Example Table}}%
%   {\begin{tabular}{ll}
%   \bfseries Dataset & \bfseries Result\\
%   Data1 & 0.12345\\
%   Data2 & 0.67890\\
%   Data3 & 0.54321\\
%   Data4 & 0.09876
%   \end{tabular}}
% \end{table}

% \begin{figure}[htbp]
%  % Caption and label go in the first argument and the figure contents
%  % go in the second argument
% \floatconts
%   {fig:example}
%   {\caption{Example Image}}
%   {\includegraphics[width=0.5\linewidth]{example-image}}
% \end{figure}

% 


% Acknowledgments---Will not appear in anonymized version
% \midlacknowledgments{We thank a bunch of people.}

% References
\bibliographystyle{plain}
\bibliography{midl24_238}
% \printbibliography
\clearpage
\appendix
\section{Mask Design}
\label{appendix: mask-design}

Assuming the image size is $H\times W$, the normalized Euclidean distance of a pixel $(u,v)$ can be:

$$r_{u,v} = \frac{\sqrt{u^2+v^2}}{\sqrt{(H-1)^2 + (W-1)^2}}$$.

$M_{u,v}$ represents the value at the point $(u,v)$ in the binary mask (\emph{i.e.,} $M_{u,v} \in \{0,1\}$).


\begin{figure}[htb]
\centering
\includegraphics[width=0.5\columnwidth]{imgs/mask illustration.png}
\caption[Data samples in different clients.]{A detailed illustration of the mask}
\label{fig-mask}
\end{figure}



\section{Detailed Data Information}
\label{appendix:detailed-data}

%As the conventional dataset in the LDCT field~\cite{mccollough2016tufg207a04} only contains 10 patients' LDCT-NDCT image pairs, to ensure the performance, we randomly selected 12 patients in each anatomy. 
To ensure the performance, we randomly selected 12 patients in each anatomy. The patient number below indicates the identifiers of patients in the database. Each client data has CT images from both SOMATOM Definition AS+ and SOMATOM Definition Flash scanners.

Client 1: Selected patient number: L143,  C004, C012, C027, C030,  C050,  C067,  C002,  C016,  C021,  C052, L506. Each patient has approximately 200 images on average.

Client 2 selected patient number: L067, L096, L192, L286, L310, L033,  L049,  L056,  L109,  L291,  L014,  L019. Each patient has approximately 320 images on average.

Client 3 selected patient number: N012, N024, N030, N047, N051, N053, N072, N076, N079, N082, N085, N100. Each patient has approximately 38 images on average.

An example of the data is:

\begin{figure}[htb]
\centering
\includegraphics[width=1\columnwidth]{imgs/dataset_samples.png}
\caption[Data samples in different clients.]{Samples in the dataset from three body parts (clients).}
\label{fig-dataset_samples}
\end{figure}


\begin{figure}[htb]
\centering
\includegraphics[width=1.0\columnwidth]{imgs/abdomen_heads_3.0.png}
\caption[]{The denoising results of the state-of-the-art FL algorithms with their error map on Abdomen and Head.}
\label{fig-ab-head}
\end{figure}



\clearpage


\section{Generalizability Study}
\label{appendix:experiment-design}

% To compare with cohorts' state-of-the-art studies in different areas, such as CT reconstruction and MRI reconstruction, we present the detailed experimental settings.

To validate our approach's generalizability to FL scenarios where each client may contain data with multiple anatomies, we further perform a client generalization test. Specifically, we propose the training set setting as below, where client 1 contains data from both the chest and abdomen, and client 2 and client 3 contain data from the abdomen and head respectively. It is important to mention that we maintain the test set to be consistent with the original data setting. Consequently, the results are directly comparable to those in Table~\ref{tab:comparison}.

% \noindent \textbf{client Generalization Test}
\begin{itemize}
\label{tab:client}
    \item Client 1: Chest + Abdomen 1 (Patient's number: L219, L014, L019)
    \item Client 2: Abdomen 2 (Patient's number: L067, L096, L192, L286, L310, L033,  L049,  L056,  L109,\ )
    \item Client 3: Head
\end{itemize}

% We name this new setting as data setting 2, compared with the original data setting as in the main manuscript (data setting 1). 
We compare our proposed method against three representative FL approaches (as seen in Table~\ref{tab:comparison}) with the new data setting, with the results shown in Table~\ref{tab:fl_freq}. It is observed that in the new data setting where the client contains data with multiple anatomies, our proposed approach FedFDD can still achieve an overall better performance compared to other competing methods.
% which shows comparable performance compared to the original data setting. 
This indicates the good generalizability of our method in alternative data scenarios.



\begin{table}[H]
\caption{Client generalization comparison experiment in the new data setting. }
\centering
\begin{tabular}{c|cc|cc|cc}
\hline
\multirow{2}{*}{\textbf{Method}} & \multicolumn{2}{c|}{\textbf{Chest}} & \multicolumn{2}{c|}{\textbf{Abdomen}}  & \multicolumn{2}{c}{\textbf{Head}}\\
\cline{2-7}
& \textbf{PSNR$\uparrow$} & \textbf{SSIM$\uparrow$} & \textbf{PSNR$\uparrow$} & \textbf{SSIM$\uparrow$}   & \textbf{PSNR$\uparrow$} & \textbf{SSIM$\uparrow$}  \\
\hline
LDCT & 15.3388& 0.6857& 28.3165 &0.8213 &43.8707 & 0.9652 \\
Localized & 22.0033 & 0.7445 & 32.1983 & 0.8776 & \textbf{44.3414} & 0.9756 \\
% Localized 2 & 22.0033 & 0.7445 & 31.8583 & 0.8754 & \textbf{44.3414} & 0.9756 \\
Centralized & 22.4649 & 0.7534 & 32.4423 & 0.8809 & 43.6559 & \textbf{0.9816} \\
\hline
\centering
FedAvg & 21.5235 & 0.7492 & 31.6691 & 0.8733 & 43.1827 & 0.9767 \\
FedBN  & 21.8890 & 0.7495 & 31.6205 & 0.8701 & 42.2720 & 0.9786 \\

FedMRI  & 22.2771 & 0.7499 & 32.2095 & 0.8780 & 42.5368 & 0.9788 \\
FedFDD  & \textbf{22.6056} & \textbf{0.7581}& \textbf{32.4497} & \textbf{0.8821} & 43.0423 & 0.9791 \\
% \textbf{FedFDD (data setting 1)} & \textbf{22.6209} & \textbf{0.7586} & \textbf{32.4510} & \textbf{0.8823} & 43.0421 & 0.9792\\ 
\hline

\end{tabular}

\label{tab:fl_freq}
\end{table}




\section{Baseline Method Details}
\label{appendix:baseline}

Here we introduce how we adapt the FedMRI \cite{feng2023specificitypreserving}, HyperNet \cite{yang2022hypernetworkbased} and Fed-PR \cite{feng2023learning} approaches to our task, as they are not designed for LDCT denoising task and cannot be directly used for the purpose. However, since they are also proposed for low-level image reconstruction task, comparisons against them can further help enhance the evaluation of our proposed method.


\noindent \textbf{FedMRI: Specificity-Preservation} \cite{feng2023specificitypreserving}:

We adapted the idea of freezing the decoder (client-specific) and globally sharing the encoder as in the FedMRI approach to our task. Specifically, we built this on the base network architecture, i.e., RED-CNN, which comprises an encoder and decoder. During the FL training, we froze each client's decoder and enabled the communication of the encoder. Ultimately, each client has its own decoder and a universal encoder.



\noindent \textbf{PromptFL: Hyperparameter Prompts} \cite{yang2022hypernetworkbased, feng2023learning}:


As discussed in Related Works, HyperFed~\cite{yang2022hypernetworkbased} proposed a personalized FL approach using localized hypernetwork of the CT scanning physical properties for the reconstruction of CT projection imaging. However, our task mainly focuses on LDCT denoising, where we cannot access the physical parameters of the imaging, which therefore limits us from using that for the hypernetwork training. On the other hand, FedPR~\cite{feng2023learning} used visual prompts in the null space of global prompt for the FL paradigm, which is not directly feasible in our case.

However, inspired by both the HyperFed and FedPR, we propose to adapt them to our task by introducing the client-specific information as hyperparameters/prompts in the task. Specifically, we proposed to introduce the CLIP representation vector of our client information as prompts to inform the denoising process. The CLIP vector of \texttt{"This is an image of \{anatomy\}  low dose CT"} ({anatomy can be \texttt{chest}, \texttt{abdomen} or \texttt{head}) is projected into a 96-dim vector, in line with the number of channels of the network bottleneck. Then the bottleneck feature maps are weighed with the softmaxed prompt vector in the channel dimension. The whole operation can be regarded as an attention mechanism prioritizing the specific channels based on the prompts. The denoising network was then trained with the guided information from the prompts.
}








\end{document}

% \begin{algorithm}[H]
% \caption{Frequency-Division Federated Learning} \label{alg:fdfl}
% \begin{algorithmic}
% % \Procedure{Server Execution}{$\omega_0$}
%     % \State Initialise the global model with random parameter $\omega_0$ \\
%     % \For{each round $e_g = 1$ \textbf{to} $E_g$} 
%     % \State In each client $i = 1$,  $\omega_{t}^{i} \leftarrow \omega_{0}$ \Comment{Initialise local model with global parameters}
%     % \State In each client $i = 1$, $\omega_{t+1}^{i} \leftarrow \textbf{LocalUpdate}(i, \omega_{t}^{i})$
%     % \State $\omega_{0} \leftarrow \textbf{Aggregate}(\{\omega_{t+1}^{1}, \omega_{t+1}^{2}, \ldots, \omega_{t+1}^{N}\})$ \Comment{Aggregate local updates}
%     % \EndFor
% % \EndProcedure
% \Function{LocalUpdate}{$i$, $\omega$}
%     \State $\mathcal{B} \leftarrow$ (split $\mathcal{D}^{i}$ into batches of size $B$)
%     \For{each local epoch $e_l = 1$ to $E_l$} % Assuming E_l local epochs
%         % \For{batch $b \in \mathcal{B}$}
%             \State Freeze all the layers in the $HighFreq$ path
%             \State Update $\omega$ with batch $b$ \Comment{Apply some update rule here}
%         % \EndFor
%     \EndFor
%     \Return $\omega$ to server
% \EndFunction
% \end{algorithmic}
% \end{algorithm}


% \begin{algorithm}[H]
% \caption{Frequency-Division Federated Learning} \label{alg:fdfl}
% \begin{algorithmic}
% \Procedure{Server Execution}{$\omega_0$}
%     \State Initialise the global model with random parameter $\omega_0$ \\
%     \For{each round $e_g = 1$ \textbf{to} $E_g$} 
%         \State In each client $i = 1$,  $\omega_{t}^{i} \leftarrow \omega_{0}$ \Comment{Initialise local model with global parameters}
%         \State In each client $i = 1$, $\omega_{t+1}^{i} \leftarrow \textbf{LocalUpdate}(i, \omega_{t}^{i})$
%         \State $\omega_{0} \leftarrow \textbf{Aggregate}(\{\omega_{t+1}^{1}, \omega_{t+1}^{2}, \ldots, \omega_{t+1}^{N}\})$ \Comment{Aggregate local updates}
%     \EndFor
% \EndProcedure
% \Function{LocalUpdate}{$i$, $\omega$}
%     \State $\mathcal{B} \leftarrow$ (split $\mathcal{D}^{i}$ into batches of size $B$)
%     \For{each local epoch $e_l = 1$ to $E_l$} % Assuming E_l local epochs
%         \For{batch $b \in \mathcal{B}$}
%             \State Freeze all the layers in the $HighFreq$ path
%             \State Update $\omega$ with batch $b$ \Comment{Apply some update rule here}
%         \EndFor
%     \EndFor
%     \Return $\omega$ to server
% \EndFunction
% \end{algorithmic}
% \end{algorithm}

%\subsubsection{Observation: Failure of Federated Learning in LDCT}
% \begin{table}[!t]
% \centering
% \begin{tabular}{lccc}
%     \toprule
%     \textbf{Method} & \textbf{PSNR$\uparrow$}    & \textbf{SSIM$\uparrow$}   & \textbf{RMSE$\downarrow$}    \\
%     \midrule
%     Localised-Chest                     & 22.1312          & 0.7443          & 33.9724          \\
%     Centralised-Chest          & 22.4649 & 0.7534 & 33.2149 \\
%     FedAvg-Chest                        & 21.3752          & 0.7538          & 37.9855          \\

%     \midrule
%     Localised-Abdomen                   & 32.3983          & 0.8790          & 9.8616           \\
%     Centralised-Abdomen        & 32.4423 & 0.8809 & 9.8256  \\
%     FedAvg-Abdomen                      & 31.8950          & 0.8735          & 10.4140          \\
%     \midrule
%     Localised-Head                      & 44.3414          & 0.9756          & 2.8113           \\
%     Centralised-Head           & 43.6559 & 0.9816 & 2.8671  \\
%     FedAvg-Head                         & 42.1911          & 0.9788          & 3.4214          \\
%     \bottomrule
% \end{tabular}
% \caption{Domain shift issue for FL in LDCT denoising: naive FL underperforms localized and centralized training due to domain shift among clients.}    
% \label{tab:baseline_table}
% \end{table}
