\section{Background}\label{sec:sec2}
% This section provides background material on inverse problems with a focus on our considered medical imaging inverse problems, Accelerated MRI Reconstruction, Cone-beam CT, and Fan-beam CT.

\subsection{Inverse problems}
\label{sec:sec2.1}
Mathematically, addressing an abstract inverse problem typically involves solving an equation expressed as
% 
\begin{equation}
    y = \mathcal A (x_{\text{true}}) + \varepsilon,
    \label{eq:forward_model}
\end{equation}
% 
where $x_{\text{true}} \in X$ represents the true model parameters that we aim to estimate, $y \in Y$ is the observed data, $\mathcal A: X \to Y$ denotes the observation operator, and $\varepsilon$ signifies the observation noise. This equation, also referred to as the forward model, establishes the relationship between  $x_{\text{true}}$ and $y$. A solution to \eqref{eq:forward_model} can be approached by explicitly minimizing the negative data log-likelihood $\mathcal L$ to identify the maximum likelihood estimator. This entails estimating $x_{\text{true}}$ by
% 
\begin{equation}
    \hat x :=  \argmin_{x \in X} \mathcal L(\mathcal A(x), y).
    \label{eq:neg_log_likelihood}
\end{equation}
% 
\noindent
However, in the case of ill-posed inverse problems or when noise is present, this can lead to noise overfitting. To prevent this, variational regularization can be applied estimating $x_{\text{true}}$
% 
\begin{equation}
    \label{eq.varmethod}
    \text{as } \quad \hat x_{\lambda} :=  \argmin_{x \in X} \left( \mathcal L(\mathcal A(x), y) + \lambda \mathcal G(x) \right),
\end{equation}
% 
\noindent
where $\mathcal G: X \to \mathbb R$ denotes a regularization functional describing prior knowledge about $x_{\text{true}}$ such as smoothness or sparsity, and $\lambda > 0$ is the regularization parameter. For many classical inverse problems arising in image reconstruction, iterative methods exist that allow to approximate \eqref{eq.varmethod} numerically, while different strategies have been proposed for picking the optimal value of $\lambda$. For instance, the Morozov discrepancy principle \cite{Morozov1966, Kaipio2005} tightly bounds the noise at the true solution by $\mathcal L(\mathcal A(x_{\text{true}}), y) \leq \epsilon$ and then selects $\lambda$ such that $\mathcal L(\mathcal A(\hat x_{\lambda}), y) \approx \epsilon$. It's important to note that the choice of $\lambda$ and the regularization functional \(\mathcal{G}\) can influence the parameters of the iterative scheme used to solve \eqref{eq.varmethod}, such as the step size or total iteration count.

\subsection{Accelerated MRI Reconstruction}\label{sec:sec2.2}
% \subsubsection{Problem Formulation}\label{sec:sec2.2.1}
% ~\\
In Accelerated MRI Reconstruction, the goal is to reconstruct an image $x \in \mathbb{C}^{n}$ from sparsely sampled multi-coil ($n_c > 1$) $k$-space data $\tilde{y} \in \mathbb{C}^{n \times n_{c}}$.  This process relies on knowledge of coil sensitivities $\vec{S} = (\vec{S}_{1}, \ldots, \vec{S}_{n_c}) \in \mathbb{C}^{n \times n{c}}$,  which reflect each coil's spatial sensitivity.  The forward model is described by a linear operator $\mathcal{A}_{\Theta,\vec{S}}:\mathbb{C}^{n} \rightarrow \mathbb{C}^{n \times n_c}$, defined as:

\begin{equation}
    \tilde{y} = \mathcal{A}_{\Theta,\vec{S}}(x) := \vec{U}_{\Theta} \circ \mathcal{F} \circ \mathcal{E}_{\vec{S}}(x),
    \label{eq:forward_operator}
\end{equation}

\noindent
combining undersampling via an operator $\vec{U}_{\Theta}$, the two-dimensional Fast Fourier transform (FFT) $\mathcal{F}$ and the coil-encoding operator $\mathcal{E}_{\vec{S}}:\mathbb{C}^{n} \rightarrow \mathbb{C}^{n \times n_c}$, which maps an image to individual coil images using $\vec{S}$. Details on  $\vec{U}_{\Theta}$ are provided in \Appendix{appendix1-mri-subsampling}.

% The undersampling operator $\vec{U}_{\Theta}$ acquires samples as determined by  $\Theta \subset \Omega = \{1,\cdots,n\}$ and sets other to zero:
% % 
% \begin{equation}
%     (\vec{U}_{\Theta}(w))_i =
%     \left(\vec{U}_{\Theta}(w)\right)_i = 
%         \begin{cases} 
%         w_i & \text{if } i \in \Theta \\
%         0 & \text{if } i \notin \Theta
%         \end{cases}
%         , \quad i = 1, \cdots, n.
% \end{equation}
% % 
% \noindent
% The acceleration factor $R = \frac{|\Omega|}{|\Theta|} = \frac{n}{|\Theta|}$ is inversely proportional to the number of samples acquired, meaning fewer samples result in higher acceleration but lower image quality. Sensitivity maps are typically derived by fully sampling a segment of the central region of the $k$-space, known as the autocalibration signal (ACS), ${\Theta_{\text{acs}}} \subset \Theta$. The ratio of the $k$-space sampled for ACS is given by $r_\text{acs}:= \frac{|{\Theta_{\text{acs}}}|}{n}$.

% \subsubsection{Iterative ADMM DL-based Accelerated MRI Reconstruction}
% \label{sec:sec2.2.2}

% A wide range of deep learning approaches have been proposed for accelerated MRI, with many relying on unrolled iterative schemes that embed the acquisition physics within a learned optimization procedure. Examples include gradient-descent unrolling in either image or frequency domains \cite{Hammernik2017,Lnning2019,Sriram2020,Yiasemis2022b} and first-order methods based on proximal gradient \cite{Luo2023}, conjugate gradient \cite{Kim2022}, or ADMM \cite{10.1007/978-3-031-52448-6_45}.

% In our experiments, we adopt an ADMM-based unrolled reconstruction framework in which each iteration alternates a data-consistency update with a learned denoising block. All convolutional and transposed-convolutional layers within the learnable components are replaced by the proposed modulated convolutions, enabling the network to adjust its behaviour according to the acquisition parameters of each sample.

% The full set of update equations, initialization strategy, sensitivity-map refinement module, and network architectures follow the vSHARP formulation \cite{yiasemis2023vsharp}. Complete mathematical details and implementation specifics are provided in \Appendix{appendix1-vsharp}.


% \subsubsection{Iterative ADMM DL-based Accelerated MRI Reconstruction}\label{sec:sec2.2.2}
% ~\\
% A plethora of approaches have emerged in the recent years aimed at reconstructing undersampled $k$-space data incorporating DL convolutional-based models, focusing largely on physics-based, unrolled iterative methods.  Such instances include first order gradient descent optimization unrolling schemes in the image \cite{Hammernik2017,Lnning2019} or frequency domain \cite{Sriram2020,Yiasemis2022b}. Other first order variants include methods utilizing algorithms such as proximal gradient \cite{Luo2023}, conjugate gradient \cite{Kim2022} or alternate direction of multipliers method (ADMM) \cite{yiasemis2023vsharp,10.1007/978-3-031-52448-6_45}. 

% For our experiments in Accelerated MRI Reconstruction we employ a DL-based algorithm that exploits variable half-quadratic splitting followed by ADMM unrolled optimization spanning $J$ iterations, namely vSHARP (variable Splitting Half-quadratic ADMM algorithm for Reconstruction of inverse-Problems). Given undersampled $k$-space measurements $\tilde{y}$, and sensitivity maps $\mat{S}$,    each unrolled iteration comprises the following steps:
% % 
% \begin{subequations}
% \begin{gather}
%     x^{(j)} = \argmin_{x\in\mathbb{C}^{n}} \frac{1}{2} \sum_{k=1}^{n_c}\left|\left| \mathcal{A}_{\Theta, \mat{S}^{k}}^{k}(x) - \Tilde{y}^{k}\right|\right|_2^2 \\ + 
%     \frac{\eta_j}{2} \big | \big | x - w^{(j-1)} + \frac{\vec{u}^{(j-1)}}{\eta_j} \big | \big |_2^2, \nonumber
% \label{eq:admm_x} \\
%     w^{(j)} =  \mathcal{D}_{\boldsymbol{\phi}_j} (x^{(j)}, w^{(j-1)}, \frac{\vec{u}^{(j-1)}}{\eta_j})
% \label{eq:admm_w}\\
%     \vec{u}^{(j)} = \vec{u}^{(j)} + \eta_j (x^{(j)} - w^{(j)}), \quad j=1,\cdots, J.
% \label{eq:admm_u}
% \end{gather}
% \label{eq:admm_vsharp}
% \end{subequations}
% \noindent
% vSHARP solves \eqref{eq:admm_x} via an iterative differentiable gradient scheme, while \eqref{eq:admm_w} is learned using trainable convolutional-based denoising modules $ \mathcal{D}_{\boldsymbol{\phi}_j}$. Initial estimations of each variable is obtained as follows:
% % 
% \begin{equation}
%     x^{(0)} = \vec{z}^{(0)} := \sum_{k=1}^{n_c} \vec{S}_{k}^{*}\mathcal{F}^{-1} (\tilde{y}) , \quad \vec{u}^{(0)} = \mathcal{U}_{\boldsymbol{\phi}_u} (x^{(0)}),
% \label{eq:admm_inits}
% \end{equation}
% % 
% where $\mathcal{U}_{\boldsymbol{\phi}_u}$ represents a DL-based initializer comprising alternating sequences of dilated convolutions and replication padding responsible for predicting suitable initial value for the Lagrange Multiplier step in \eqref{eq:admm_u}. For further details refer to the original work \cite{yiasemis2023vsharp}.

% For the prediction of the sensitivity maps $\vec{S}$, vSHARP also employs a separate DL convolutional-based model, denoted as $\mathcal{S}_{\boldsymbol{\phi}_S}$, which takes as input estimated sensitivities $\tilde{\vec{S}}$ using ACS-sampled $k$-space data (see \cite{Yiasemis2022b} for more details on initial estimation) and refines them during training:
% % 
% \begin{equation}
%     \mat{S}_k = \mathcal{S}_{\boldsymbol{\phi}_S} (\tilde{\mat{S}}_k), \quad k=1,\cdots, n_c.
% \end{equation}
% % 
% Concerning the architecture of the denoising models $\{\mathcal{D}_{\boldsymbol{\phi}_j}\}_{j=1}^{J}$ and sensitivity module $\mathcal{S}_{\boldsymbol{\phi}_S}$, we opted for the 2D U-Net architecture \cite{ronneberger2015u}, which combines an encoder (2D convolutions and 2D max pooling), and a decoder (2D transpose convolutions) with skip connections.


\subsection{Cone-beam CT} \label{sec:sec2.3}
% \subsubsection{Problem Formulation} \label{sec:sec2.3.1}
% ~\\
Cone-beam CT reconstruction seeks to recover the spatially varying X-ray attenuation coefficients $x \in X \subset \mathbb{R}^3$ from noisy projection measurements $y$. For a monochromatic X-ray source, the forward model is defined by the projection operator $\mathcal{P}$ (defined in \Appendix{appendix1-cbct-geometry}), which integrates $x$ along rays from the source to detector elements.  Assuming a Poisson noise model following the Beer--Lambert law, the data acquisition is described by
% 
\begin{equation}\label{eq.noisemodel}
    y = \text{\texttt{Poisson}}(I_0 \cdot e^{-\mathcal P x}),
\end{equation}
% 
where $I_0$ denotes the unattenuated photon count. Higher photon and projection counts reduce image noise but increase radiation dose. The reconstruction problem is to recover $x$ given $y$.



\subsection{Fan-beam CT} \label{sec:sec2.4}
% \subsubsection{Problem Formulation}\label{sec:sec2.4.1}
% ~\\
Fan-beam CT is the two-dimensional analogue of CBCT, where the domain is $X \subset \mathbb{R}^2$ and the projection operator $\mathcal{P}$ is defined by line integrals along rays in the imaging plane. The full geometric description of the source trajectory, detector parametrization, and projection mapping is provided in \Appendix{appendix1-fanbeam-geometry}. The noisy acquisition model follows \eqref{eq.noisemodel}.





% \subsection{Cone-beam CT} \label{sec:sec2.3}
% \subsubsection{Problem Formulation} \label{sec:sec2.3.1}
% ~\\
% Mathematically, the problem of Cone-beam CT (CBCT) reconstruction can be formulated in the following way. For photons with given fixed X-ray energy, we denote the correponding tissue attenuation coefficient at a point $z \in X \subset \mathbb R^3$ by $z(x) \in \mathbb R_{\geq 0}$, where $X$ is a three-dimensional domain. In general, tissue attenuation coefficients depend on the energy level, but we assume monochromatic X-ray source for the purposes of this paper. The X-ray source rotates around a patient following a circular orbit, which is parameterized as a curve $\gamma: [0,1] \to \mathbb R^3$. Detector position and orientation are specified as a family of planes $Y: t \mapsto Y(t)$ for $t \in [0,1]$, where each such plane is canonically identified with $\mathbb R^2$. We let  $l_{t, u}$ be the line from the source position $\gamma(t)$ at time step $t \in [0, 1]$ to the detector element $u \in Y(t)$. This allows us to define the \textit{projection operator} as
% % 
% \begin{equation}
%     \label{eq.projector}
%     \mathcal P(x)(t, u) = \int_{l_{t, u}} x(z) dz, 
% \end{equation}
% % 
% therefore, $\mathcal P$ is a linear operator mapping functions defined on three-dimensional domain $X$ to functions defined on $[0,1] \times \mathbb R^2$. Hermitian adjoint $\mathcal P^*$ of $\mathcal P$ is called the \textit{backprojection operator}. Following Beer-Lambert law, noisy data acquisition process in the absence of scatter can be modeled as
% \begin{equation}\label{eq.noisemodel}
% y = \text{\texttt{Poisson}}(I_0 \cdot e^{-\mathcal P x}),
% \end{equation}
% where $I_0$ is the unattenuated X-ray photon count. Higher photon count results in lower image noise, but at the same time increases radiation exposure. The goal of the reconstruction algorithm is to retrieve the tissue attenuation coefficients $x$ given the noisy projection data $y$.

% \subsubsection{$\partial$U-net}
% ~\\
% As a baseline learned iterative scheme for fast and memory-efficient Cone-beam CT reconstruction, we used $\partial$U-net \cite{Hauptmann2020}, which is a multi-scale learned iterative scheme operating in image domain only at four different resolution scales of $1, \frac 1 2, \frac 1 4$ and $\frac 1 8$ starting at the lowest resolution. The network blocks operating at reduced resolutions are small convolutional neural networks consisting of $3$ convolutional layers with ReLU activations and normalization layers, while the network block operating at full resolution is a 3d U-net that combines the intermediate reconstructions to obtain the final image. FDK reconstruction with ramp filter and frequency cut-off at $95\%$ was provided as the initial reconstruction to the network.

% \subsection{Fan-beam CT} \label{sec:sec2.4}
% \subsubsection{Problem Formulation}\label{sec:sec2.4.1}
% ~\\
% Fan-beam CT can be seen as a `two-dimensional version' of CBCT, where the domain $X \subset \mathbb R^2$ is two dimensional, source trajectory is a circular orbit $\gamma: [0,1] \to \mathbb R^2$ and detector position and orientation are parameterized as a family of lines $Y: t \mapsto Y(t)$ for $t \in [0,1]$. $l_{t, u}$ is the line from the source position $\gamma(t)$ at time step $t \in [0, 1]$ to the detector element $u \in Y(t)$. This notation allows us to define the \textit{projection operator} $\mathcal P$ via \eqref{eq.projector},  $\mathcal P$ now being a linear operator mapping functions defined on two-dimensional domain $X$ to functions defined on $[0,1] \times \mathbb R$. Fan-beam CT noisy acquisition process is modeled using \eqref{eq.noisemodel}.


% \subsubsection{Learned Primal-Dual reconstruction}\label{sec:sec2.4.2}
% ~\\
% As a baseline learned iterative scheme for CT reconstruction, we used Learned Primal-Dual algorithm (LPD) \cite{Adler2017b}. LPD is a learned iterative scheme inspired by the Primal-Dual Hybrid Gradient method\cite{Chambolle2011}, which, unlike $\partial$U-net, makes use of both image-space and projection-space operations in an end-to-end trainable network. Image-space computations are performed by \emph{primal blocks} and projection-space computations are performed by \emph{dual blocks}, all primal/blocks being small convolutional neural networks with $3$ convolutional layers, parametric ReLU (PReLU) activation functions and batch normalization layers. To connect primal and dual blocks, projection and backprojection operators are used. Unlike $\partial$U-net, LPD is not designed for optimal memory efficiency and cannot be applied to CBCT directly due to memory limitations.

