\documentclass{midl} % Include author names

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{mwe} % to get dummy images
\usepackage{booktabs}
\usepackage{booktabs}
\usepackage{float}
\usepackage{multirow}
\usepackage{graphicx}
\usepackage{makecell, romannum, amssymb, amsmath, ccaption, setspace, algorithm, algpseudocode, enumitem, pifont, etoolbox, fontawesome5, longtable, array, arydshln, bbding}
\jmlrvolume{-- 293}
\jmlryear{2026}
\jmlrworkshop{Full Paper -- MIDL 2026}
\editors{Accepted for publication at MIDL 2026}

\title[Adversarial CT Report Generation]{No Evidence of Disease: Clinically-Risky Adversarial Chest CT Report Generation}

 % Use \Name{Author Name} to specify the name.
 % If the surname contains spaces, enclose the surname
 % in braces, e.g. \Name{John {Smith Jones}} similarly
 % if the name has a "von" part, e.g \Name{Jane {de Winter}}.
 % If the first letter in the forenames is a diacritic
 % enclose the diacritic in braces, e.g. \Name{{\'E}louise Smith}

 % Two authors with the same address
 % \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\and
 %  \Name{Author Name2} \Email{xyz@sample.edu}\\
 %  \addr Address}

 % Three or more authors with the same address:
 % \midlauthor{\Name{Author Name1} \Email{an1@sample.edu}\\
 %  \Name{Author Name2} \Email{an2@sample.edu}\\
 %  \Name{Author Name3} \Email{an3@sample.edu}\\
 %  \addr Address}


% Authors with different addresses:
% \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\\
% \addr Address 1
% \AND
% \Name{Author Name2} \Email{xyz@sample.edu}\\
% \addr Address 2
% }

%\footnotetext[1]{Contributed equally}

% More complicate cases, e.g. with dual affiliations and joint authorship
\midlauthor{\Name{Samra Irshad\nametag{$^{1}$}} \orcid{0000-0002-7148-6699} \Email{samra@khu.ac.kr}\\
\addr $^{1}$ School of Computing, Kyung Hee University, Gyeonggi-do 17104, South Korea \AND
\Name{Junho Kim\nametag{$^{2}$}} \Email{arkimjh@illinois.edu} \\
\addr $^{2}$ University of Illinois Urbana–Champaign, Champaign, IL, United States \AND
\Name{Seong Tae Kim\nametag{$^{1,*}$}}\footnotetext[1]{Corresponding author.} \Email{st.kim@khu.ac.kr}\\
\addr $^{1}$ School of Computing, Kyung Hee University, Gyeonggi-do 17104, South Korea
}

\begin{document}

\maketitle

\begin{abstract}
Automated chest CT radiology report generation has equipped clinicians with the ability to automatically describe clinical findings and abnormalities from CT scans. Given that patient prognosis relies heavily on these reports, generating an accurate CT report is critical. Advances in Multimodal Large Language Models (MLLMs) have enabled substantial improvements in CT-to-text report generation models, yet recent studies show that MLLMs are highly susceptible to adversarial perturbations. Beyond this known susceptibility, it remains unclear what triggers clinically dangerous attack scenarios during medical report generation. Understanding such threats is essential for developing robust medical AI systems~\textemdash~without a clear characterization of the threat, it is challenging to mitigate real-world risks. In this paper, we investigate how chest CT report generation models can be adversarially manipulated and what constitutes an adversarial CT report. We introduce \textbf{Clinically Risky Adversarial Report Generation (CRA-RG)}, a threat model that defines clinically realistic adversarial alterations to chest CT reports. To instantiate this threat model, we develop a targeted multimodal attack that perturbs both CT volumes and conditioning text prompts to induce clinically risky changes in reports. We show that our attack can successfully omit and fabricate clinically grounded high-risk CT chest findings (e.g., \textit{nodules} or \textit{lesions}). To the best of our knowledge, our study is the first empirical demonstration that state-of-the-art CT report generation models can be deceived into producing harmful clinical decisions, potentially leading to missed diagnoses or unnecessary biopsies. We evaluate our attack on two state-of-the-art CT report generation models using the publicly available chest 3D CT RadGenome dataset.
\end{abstract}

\begin{keywords}
Multimodal Large Language Models, CT Report Generation, Adversarial Attack.
\end{keywords}

\section{Introduction}
Examining chest scans for abnormalities across thoracic organs and then articulating the observed findings in a detailed written report are core responsibilities of radiologists. Chest radiology reports typically provide structured, organ-based descriptions of both normal anatomy and pathological findings. Given that the interpretation of chest radiographs required to write radiology reports is time-consuming and depends on specialized expertise, there has been growing interest in developing automated systems to assist with chest radiology reporting \cite{radiology_rep, Chen2020, Li2023, zhang2025semantic}. Automated models for radiology report generation are built on recent advances in multimodal learning, driven mainly by Multimodal Large Language Models (MLLMs) \cite{li2023blip, hu2024bliva, jian-etal-2024-large}. While prior studies have predominantly focused on chest X-ray report generation, chest CT scans contain much finer anatomical detail and impose a higher interpretive burden on radiologists. As a result, the development of radiology report generation models for chest CT interpretation has only recently begun to receive attention \cite{chen2025large, Hamamci2024}.
\begin{figure*}[h!]
\centering
\includegraphics[width=0.8\linewidth]{Figures/fig1.png}
\caption{
Adversarially manipulated chest CT report: CT volume (left) is shown with the groundtruth CT report (middle) illustrating organ-wise findings, and the corresponding adversarial report (right). Only key phrases are displayed for brevity.}
\label{figg:fig1}
\end{figure*}

Despite these successes, it is well established that vision models are vulnerable to adversarial attacks \cite{goodfellow2015explaining}. Although imperceptible, these adversarial attacks can profoundly distort model outputs, making them particularly dangerous in safety-critical domains \cite{Nguyen2024}. Beyond vision models, recent studies have raised new AI safety concerns by demonstrating that even LLMs are vulnerable, with adversarial prompts capable of bypassing built-in guardrails and manipulating models to generate harmful or factually incorrect responses \cite{Han2024, Zou2023, Andriushchenko2024}. Because MLLMs rely on LLMs as their language decoder and additionally incorporate visual inputs, the safety weaknesses of LLMs naturally extend to the multimodal setting. As a result, adversarial prompts introduced through either text or images present a new and significant threat to MLLMs \cite{aafaq2021controlled, zhao2023evaluating, Dong2023, Shayegani2023JailbreakIP}. This poses a significant risk to medical AI, particularly multimodal radiology report generation systems that rely on both image and language inputs and may produce clinically dangerous errors when exposed to adversarial prompts or perturbed images. Understanding these attack vectors is therefore essential for developing robust medical AI systems. Motivated by these concerns, we pose the following question:\\
 
\textbf{\textit{Can chest CT radiology report generation models be adversarially attacked, and, more importantly, what does it mean for a radiology report itself to be considered `attacked'?}}\\

In this paper, we introduce \textbf{Clinically Risky Adversarial Report Generation (CRA-RG)}, a new adversarial threat that defines how MLLM-based chest CT-to-text report generation models can be attacked at test time to conceal or fabricate critical chest findings. We present an example of an Adversarial CT report in \figureref{figg:fig1}, illustrating the manipulation of critical chest features. To realize this threat model, we develop a multimodal targeted adversarial attack that leverages learnable visual and textual perturbations and injects them into the model's inputs. Specifically, we apply voxel-level adversarial perturbations to both the full chest CT volume and the anatomy of interest (\textit{e.g.,} lung parenchyma or breast region), as well as perturbations to the text prompt. By jointly attacking the visual and textual representations, our method generates adversarial chest CT reports that remain clinically plausible yet omit and insert high-risk chest CT findings. Our proposed attacking framework is shown in \figureref{figg:fig2}.
\begin{figure*}[h!]
\centering
\includegraphics[width=0.99\linewidth]{Figures/fig2.png}
\caption{
Our proposed framework for Clinically-Risky adversarial CT report generation. Given a 3D chest CT volume and text input at test time, we inject learnable targeted multimodal perturbations $\delta_{ct}$ and $\delta_{t}$ into CT volume and text prompt embeddings, respectively, to induce clinically risky report changes.
}
\label{figg:fig2}
\end{figure*}
\section{Related Work}
\subsection{Multimodal Large Language Models (MLLMs)}
Typically, MLLMs integrate a visual encoder with a pretrained Large Language Model (LLM) decoder via a connector to learn unified vision–language representations \cite{liu2023visual, li2023blip, alayrac2022flamingo}. The visual encoder extracts image embeddings, the connector transforms these embeddings into the language module's latent space, and finally, the language decoder generates the textual response conditioned on multimodal prompts. These representations, in turn, provide the foundation for effective cross-modal reasoning \cite{Touvron2023, Jiang2023, OpenAI2023}. With access to large-scale text corpora organized in natural-language instruction formats and equipped with large parameter-sized models, LLMs acquire strong instruction-following capabilities. When serving as the language decoder in MLLMs, these capabilities enable the model to reason over visual inputs and generate textual responses when guided by multimodal prompts. 
%Downstream adaptation of MLLMs often requires task-specific fine-tuning, which is a challenge in data-scarce domains. Parameter-efficient methods like LoRA and QLoRA mitigate this challenge by allowing only a small subset of parameters to be trained while preserving performance \cite{hu2022lora, dettmers2023qlora}. 
Leveraging these advances, recent studies have applied MLLMs to radiology report generation by fine-tuning them on paired radiology scan–report datasets \cite{Bannur2024MAIRA2GR, lee2025cxr}.
\subsection{Adversarial Attacks on Multimodal Large Language Models (MLLMs)}
Recently, several works have reported that MLLMs are vulnerable to adversarial attacks \cite{Qi2023, Carlini2023, Gong2023}. These attacks generally fall into two main categories, \textbf{jailbreak} and \textbf{evasion} attacks \cite{Qi2023, Cui2023}. \textbf{Jailbreak attacks} are primarily designed to break the safety alignment of MLLMs.
%or to probe model behaviour under malicious queries, a process often referred to as \textit{red-teaming}. Jailbreak attacks can cause models to bypass their safety mechanisms, thereby enabling them to generate harmful or undesired responses. 
On the other hand, \textbf{evasion attacks} aim to manipulate visual, textual, or both inputs to alter the model's normal behaviour. Evasion attacks are typically designed to introduce targeted mispredictions or produce untargeted misleading outputs while keeping perturbations small \cite{Cui2023, zhao2023evaluating, Hanif2025}. Our work falls into the category of targeted evasion attacks. A few studies have recently examined the adversarial robustness of MLLMs for medical VQA tasks. \cite{Clusmann2025, Hanif2025}.
However, the existing studies attack the MLLMs by assuming a generic target, \textit{e.g., no signs of disease} \cite{Hanif2025}. Such generic targets are insufficient for radiology report generation, which follows a strict organ-based structure. Radiology chest CT reports consist of organ-specific sections, each describing anatomy-specific findings \cite{Hamamci2024a}. Each organ-specific section describes the associated abnormalities that are organ-specific. For example, lung findings and abdominal findings involve distinct pathologies and therefore, rely on terminology unique to their respective anatomies. Consequently, a single generic target is insufficient for attacking radiology report generation models. To our knowledge, no prior work has investigated targeted, organ-aware adversarial attacks on CT report generation models. Our method addresses this gap by explicitly modeling and attacking organ-specific findings in CT reporting MLLMs.
\section{Proposed Methodology}
\subsection{Problem Formulation}
Consider a 3D chest CT scan $x_{\text{ct}}$ and a text prompt $x_{\text{t}}$ that instructs the MLLM to generate a text report. At test time, given $(x_{\text{ct}}, x_{\text{t}})$, the model autoregressively predicts the conditional probability of a report $y = (y_1, \dots, y_K)$, as expressed in \equationref{eq:autoregressive}.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{equation}
p_{\theta}(y \mid x_{ct}, x_{t}) = \prod_{k=1}^{K}p_{\theta}(y_{k} \mid x_{ct}, x_{t}, y_{<k}),
\label{eq:autoregressive}
\end{equation}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%$p_{\theta}(\hat{y} | x_{\text{ct}})$ , 
where $p_{\theta}$ denotes the CT-grounded radiology report generation model parameterized by $\theta$, $y_k$ is the $k$-th token in the report, $y_{<k} = (y_1, \dots, y_{k-1})$ denotes all previously generated tokens, and $K$ is the length of the report. Then, the task of adversarial chest CT report generation ${y}_{\text{adv}}$ can be formulated as below:
\begin{equation}
p_{\theta}\!\left( 
    y_{\text{adv}} \,\middle|\, 
    x_{\text{ct}} + \delta_{\text{ct}},\,
    x_{\text{t}} + \delta_{\text{t}}
\right)
=
\prod_{k=1}^{K}
p_{\theta}\!\left(
    y_{\text{adv},k} \,\middle|\,
    x_{\text{ct}} + \delta_{\text{ct}},\,
    x_{\text{t}} + \delta_{\text{t}},\,
    y_{\text{adv},<k}
\right).
\label{eq:adv_autoregressive}
\end{equation}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
In \equationref{eq:adv_autoregressive}, $\delta_{\text{ct}}$ represent the learnable visual perturbation applied to the 3D chest CT volume and is bounded by perturbation magnitude $||\delta_{\text{ct}}||$. The goal of the adversary is to keep the perturbation magnitude minimum ($||\delta_{\text{ct}}|| \le \epsilon_{\text{ct}}$) to impose imperceptibility between clean image $x_{\text{ct}}$ and perturbed image $x_{\text{ct}}+\delta_{\text{ct}}$. Likewise, $\delta_{\text{t}}$ is the prompt perturbation bounded by $\epsilon_{\text{t}}$ ($||\delta_{\text{t}}|| \le \epsilon_{\text{t}}$). 

To examine whether effective adversarial perturbations can be learned for multimodal inputs that mislead the MLLM into altering high-risk chest findings and subsequently generating a clinically dangerous report, we define a target text that the attacker aims to force the model to generate. This type of attack falls under the category of \textbf{targeted adversarial attack}, in which the adversary specifies in advance the exact output (typically known as \textbf{target}) they aim to obtain from the victim model. In targeted adversarial attack scenarios, the target is typically drawn from the model's normal output distribution but is deliberately selected because it represents an incorrect or dangerous outcome \cite{Sato2020DirtyRC}. Following this principle, we define clinically plausible target outputs that intentionally contradict the ground truth. These opposing targets~\textemdash~such as reporting an abnormality as absent or fabricating a nonexistent one~\textemdash~are chosen to induce harmful clinical decisions.
\subsection{Threat Model}
We assume a white-box threat model in which the adversary has complete knowledge about the victim CT-to-text generative model $p_{\theta}$. Under this setting, we focus on learning perturbations for targeted adversarial manipulation of the generated text report.
\subsubsection{Clinically-Risky Adversarial Report Generation} 
In our proposed attack, the adversary aims to deceive the MLLM into producing a predefined harmful target report. We sample the targets from the original reports to ensure that $y_{\text{target}} \in \mathcal{Y}$, where $\mathcal{Y}$ denotes the report sentences extracted from the dataset. Thus, the goal of our targeted attack is to drive the MLLM toward generating the predefined target report by minimizing the text modeling loss $\mathcal{L}$:
\begin{equation}
\min_{\delta_{\text{ct}},\, \delta_{t}}
\; \mathcal{L}\!\left(
y_{\text{target}},\;
p_{\theta}\!\left( y_{adv} \,\mid\, x_{\text{ct}} + \delta_{\text{ct}},\; x_t + \delta_{t}\right)
\right)
\quad
\text{s.t.}
\quad
\|\delta_{\text{ct}}\| \le \epsilon_{\text{ct}},\;
\|\delta_{t}\| \le \epsilon_{t}.
\end{equation}
Each chest CT report describes the thoracic organs and indicates whether abnormalities are present. From these descriptions, we derive candidate negative $\mathcal{Y}_{\text{neg}}$ and positive targets $\mathcal{Y}_{\text{pos}}$. For instance, a negative lung target might be: \textit{No nodule or infiltrative lesion is observed in the lung parenchyma}. Driven by the possible ways in which a chest CT report can be adversarially manipulated, we define the following adversarial goals:
\paragraph{(a) Suppressing high-risk chest abnormalities:} Here, the adversary aims to increase the likelihood of clinical misdiagnosis by omitting specific targeted abnormalities. We sample the target text from the set of negative sentences in the ground-truth reports, which indicate the absence of any critical abnormality ($y_{\text{target}} \in \mathcal{Y}_{\text{neg}}$).
\paragraph{(b) Fabricating high-risk chest abnormalities:} Here, the adversary aims to potentially trigger unnecessary biopsies or follow-up examinations by inserting particular high-risk abnormalities. In this case, we sample the target text from the set of positive sentences in the ground-truth reports, which indicate the presence of critical abnormality ($y_{\text{target}} \in \mathcal{Y}_{\text{pos}}$).
\subsubsection{Multimodal Adversarial Optimization}
To fool the MLLM to generate an adversarial text report based on multimodal input, we begin by randomly initializing learnable visual perturbations $\delta_{\text{ct}} \in \mathbb{R}^{C \times H \times W \times D}$ and text prompt perturbations $\delta_{t} \in \mathbb{R}^{L \times d}$. $C$, $H$, $W$, and $D$ represents channel, height, width and depth dimension of 3D CT volume. $L$ and $d$ denote the length and hidden dimension of the text embedding. $\delta_{\text{ct}}$ is added to the clean 3D CT scan $x_{ct} \in \mathbb{R}^{C \times H \times W \times D}$ and $\delta_{t}$ to the text-prompt embedding $E_{t} \in \mathbb{R}^{L \times d}$ derived from text prompt $x_t$. We also localize the perturbation to the specific anatomical region using segmentation masks to isolate that region within the CT volume. This enables our proposed adversary to steer the adversarial optimization toward the loss associated with the corresponding anatomical subsection of the report. More specifically, let $M^{(r)} \in \{0,1\}^{H \times W \times D}$ be the binary segmentation mask for anatomical region $r$ (\textit{e.g.,} lung), and let $\delta_{\text{ct}}$ be the global image perturbation. The perturbation applied to region $r$ is $\delta_{\text{ct}}^{(r)} = M^{(r)} \odot (\delta_{\text{ct}}+x_{ct})$. After the perturbed multimodal input is passed through the MLLM, we can then obtain the text report loss $\mathcal{L}$ and subsequently the perturbation's gradient corresponding to the image and text embedding $\nabla_{\delta_{ct}}$ and $\nabla_{\delta_{t}}$, respectively. The image- and text-prompt perturbation gradients are updated via gradient descent, followed by projection onto the allowable perturbation range. Figure 7 in Appendix E illustrates the evolution of adversarially optimized CT images across different optimization steps.

We update perturbations $\delta_{\text{ct}} \leftarrow 
\operatorname{clip}(
\delta_{\text{ct}} - \alpha_{ct}\,\operatorname{sign}(\nabla_{\delta_{ct}}))$ and $\delta_{\text{t}} \leftarrow 
\operatorname{clip}(
\delta_{\text{t}} - \alpha_{t}\,\operatorname{sign}(\nabla_{\delta_{t}}))$ using PGD \cite{Madry2017}. $\alpha_{ct}$ and $\alpha_{t}$ denote the step sizes for image and text embedding perturbation updates. To keep perturbations minimal while adversarially effective, we use PGD with adaptive early stopping \cite{Li2025} where optimization stops once the generated report is sufficiently aligned with the target ($\geq\tau$ similarity by a text similarity metric) \cite{Zhang2020}. In our experiments, we use 1/255, 0.01, 16/255, 0.1, and 0.85 for $\alpha_{ct}$, $\alpha_{t}$, $\epsilon_{ct}$, $\epsilon_{t}$, and $\tau$, respectively. We set 100 as the maximum number of optimization steps.
\section{Experimental Settings}
\subsection{Victim CT-to-text Report Generation Model}
We evaluate our attack on two state-of-the-art 3D CT–to–text report generation models, Reg2RG \cite{chen2025large} and CT-CHAT \cite{Hamamci2024}. Reg2RG \cite{chen2025large} is a multimodal model composed of a 3D Vision Transformer (3D-ViT) \cite{Wu2025} for volumetric CT encoding and a LLaMA2-7B decoder \cite{touvron2023llama}. It incorporates anatomically grounded region tokens to improve alignment between visual features and organ-specific descriptions, enabling the generation of structured chest CT reports. CT-CHAT \cite{Hamamci2024} combines a 3D CT-CLIP encoder with a LLaMA-3.1-8B decoder via a multimodal projector, leveraging CT-CLIP’s contrastively learned 3D representations. All experiments use the official pretrained weights and inference pipeline released by the authors. \footnote{\url{https://github.com/zhi-xuan-chen/Reg2RG}},\footnote{\url{https://github.com/ibrahimethemhamamci/CT-CHAT}}
\begin{table}[t]
    \caption{Evaluation of adversarial attack on targeted organ and entire report. We evaluate our attack for both Targeted Suppression (Omission) and Targeted Fabrication (Insertion) of clinical findings. A higher ROUGE-L score indicates stronger adversarial effectiveness.}
     \vspace{0.5em}  %
    \label{tab:tab1}
    \centering
    \small
    \resizebox{1.0\linewidth}{!}{
    \begin{tabular}{lcccc}
        \Xhline{2\arrayrulewidth}
        \multirow{2}{*}{\bfseries Method} 
        & \multicolumn{2}{c}{\bfseries Organ-level} 
        & \multicolumn{2}{c}{\bfseries Report-level} \\ \cmidrule(lr){2-3} \cmidrule(lr){4-5}
        & \bfseries Suppression & \bfseries Fabrication & \bfseries Suppression & \bfseries Fabrication \\
        \midrule
      Reg2RG \cite{chen2025large} (w/o attack)      &  0.439 &           0.198 &          0.370 &          0.280   \\
       CRA-RG (Reg2RG)    & \textbf{0.937}  &  \textbf{0.888} & \textbf{0.410}&  \textbf{0.294} \\
        \cdashline{1-5}\noalign{\vskip 0.2ex}
         CT-CHAT \cite{Hamamci2024} (w/o attack)      &  0.445 &   0.513         &   0.281        &      0.276        \\
        CRA-RG (CT-CHAT)    & \textbf{1.000}   & \textbf{1.000}  & \textbf{0.761} &  \textbf{0.508} \\
        % add more rows as needed
        \Xhline{2\arrayrulewidth}
    \end{tabular}
}
\end{table}
\subsection{Implementation and Benchmark Details}
We evaluate our attack on the RadGenome-Chest dataset \cite{Zhang2025}, using the 1,500 CT–report pairs from its validation split. All experiments are conducted on a single NVIDIA RTX A6000 (48 GB). As shown in \figureref{figg:fig2}, both the volume encoder and language decoder remain frozen during attack optimization. For demonstration, we focus on adversarial manipulation of critical findings in the lung and breast. 
\subsection{Evaluation Setup for Adversarial CT Reports}
We evaluate two targeted attack scenarios to determine whether the adversary can fool the MLLM to omit true findings and fabricate false ones. For each input CT scan, a target is randomly selected from a set of predefined clinical statements. Although the attack focuses on specific anatomies (the lung and breast), perturbations are applied to the entire CT volume and may affect other findings. To analyze these effects, we evaluate adversarial outcomes at both the organ and report levels. Organ-level evaluation measures how closely the generated organ-specific captions match the predefined targets. Report-level evaluation compares the full adversarial report to two idealized adversarial versions~\textemdash~one with all critical findings suppressed and one with all findings fabricated. For reference, we also compute the corresponding similarities for benign predictions to establish a baseline indicating how unlikely such manipulations are in the absence of an attack.
\section{Experimental Results}
\subsection{Attack Success Rate of Targeted CT Report Manipulation} 
To assess whether the adversarial perturbations applied to multimodal input can lead to successful fooling of the MLLM in manipulating the critical findings, we compute ROUGE-L \cite{lin2004rouge} similarity between the adversarially generated reports and predefined target texts as summarized in \tableref{tab:tab1}. Both Reg2RG and CT-CHAT show low organ- and report-level similarity with the adversarial target texts in the absence of an attack, indicating that the target descriptions rarely occur in normal report generation. In contrast, our attack substantially increases ROUGE-L scores for both targeted suppression and fabrication, demonstrating effective manipulation of localized clinical findings across two state-of-the-art CT report generation models. We observe consistently higher success at the organ level than at the report level, suggesting that localized findings are easier to manipulate due to their direct correspondence with visual features, whereas full reports require maintaining global clinical and linguistic consistency. While steering an entire report toward a fixed adversarial target remains challenging, these results highlight a critical vulnerability whereby clinically salient findings can be selectively inserted or removed without fully compromising global report coherence. Qualitative examples of ground-truth and adversarial CT reports are provided in \figureref{figg:fig4} and \figureref{figg:fig5} in Appendix A. We analyze attack robustness under varying perturbation budgets in Table 7 (Appendix B), present qualitative comparisons between clean and adversarial CT images with difference maps in Figure 6 (Appendix C), and report quantitative quality metrics for adversarial CT images in Table 8 (Appendix D).
\begin{table}[t]
    \caption{NLG-based Evaluation of Stealthiness of Adversarial Reports.}
     %\vspace{0.5em}  %
    \label{tab:tab2}
    \centering
    \small
    \resizebox{0.8\linewidth}{!}{
    \begin{tabular}{lcccccc}
    \Xhline{2\arrayrulewidth}
    \bfseries Method & \bfseries BL-1 & \bfseries BL-2 & \bfseries BL-3 & \bfseries BL-4& \bfseries MTR & \bfseries RG-L\\
    \midrule
    Reg2RG \cite{chen2025large} (w/o attack)  & 0.473 &0.365 &0.296 &0.249 &0.441 &0.367 \\
     CRA-RG (Reg2RG) & 0.413 & 0.302& 0.229& 0.179 & 0.387 & 0.306 \\
     \cdashline{1-7}\noalign{\vskip 0.2ex}
        CT-CHAT \cite{Hamamci2024} (w/o attack)   &  0.366& 0.260&0.195 & 0.154& 0.275&0.199 \\
        CRA-RG (CT-CHAT) & 0.252 &0.197 & 0.167&0.149  & 0.319 & 0.188 \\
    \Xhline{2\arrayrulewidth}
\end{tabular}
}
\end{table}
\subsection{Stealthiness of Adversarial CT Reports} 
To assess whether adversarial CT reports generated by attacking the MLLM remain stealthy and inconspicuous, we quantify the extent to which they preserve the structural characteristics of reports produced from clean inputs. Specifically, we compute Natural Language Generation (NLG) metrics between adversarial and ground-truth reports and compare these scores with those obtained by evaluating benign reports\footnote{Benign CT reports denote reports generated from clean, unattacked inputs.} against their corresponding ground-truth texts. In this setting, NLG metrics serve as proxies for structural similarity because they indicate how closely the adversarial reports retain the overall textual organization of the original reports. We compute BLEU-n \cite{bleu} (n-gram overlap), ROUGE-L \cite{lin2004rouge} (longest-common-subsequence similarity), and METEOR \cite{meteor} (semantically informed matching), as summarized in \tableref{tab:tab2}. As shown in the Table \ref{tab:tab2}, adversarial reports exhibit modest reductions in NLG scores relative to benign baselines; however, the decline is not substantial given that these reports are generated from adversarially perturbed CT scans and prompt embeddings. We observe a relatively larger drop in NLG scores for CT-CHAT under attack compared to Reg2RG. We hypothesize that this is largely due to CT-CHAT's higher fooling rate (as shown in Table \ref{tab:tab1}), which allows adversarial perturbations to more strongly manipulate the reports. Despite successfully suppressing critical clinical findings, the adversarial CT reports largely preserve the structural form of the original CT reports and therefore remain stealthy.
\begin{table}[t]
    \caption{Transferability of adversarial CT perturbations across language decoders: Adversarial CT volumes generated with Reg2RG (3D ViT + LLaMA2-7B) are evaluated for adversarial report generation effectiveness on a Mistral-7B decoder. We report average attack success rate on organ level and report level.}
     %\vspace{0.5em}  %
    \label{tab:tab3}
    \centering
    \small
    %\scriptsize
    %\resizebox{1.0\linewidth}{!}{
    \begin{tabular}{lcc}
        \Xhline{2\arrayrulewidth}
       \bfseries Method 
        & \bfseries Organ-level
        & \bfseries Report-level\\ 
        \midrule
        Reg2RG (LLaMA2-7B) \cite{chen2025large} (w/o attack)      & 0.319 &  0.325   \\
         Reg2RG (Mistral-7B) \cite{chen2025large} (w/o attack)      &  0.263 &   0.404   \\ \cdashline{1-3}\noalign{\vskip 0.2ex}
        CRA-RG (LLaMA2-7B)    & 0.913   & 0.352 \\
        CRA-RG (Mistral-7B)    & 0.795   & 0.395 \\
        \cdashline{1-3}\noalign{\vskip 0.2ex}
        CRA-RG (LLaMA2-7B $\to$ Mistral-7B) & 0.594   & 0.284 \\
        CRA-RG (Mistral-7B $\to$ LLaMA2-7B) & 0.387   & 0.334 \\
        % add more rows as needed
        \Xhline{2\arrayrulewidth}
    \end{tabular}
%}
\end{table}
\subsection{Transferability of Adversarial CT Perturbations}
To analyze if our induced CT perturbations persist across different MLLMs, we conduct black-box transferability experiments. We evaluate black-box transferability by applying adversarial CT volumes optimized against the Reg2RG model with a LLaMA-2-7B decoder \cite{chen2025large} to a target model employing the same 3D-ViT encoder but a different language decoder (Mistral-7B). \footnote{Due to backbone-specific volumetric preprocessing, we assess black-box transfer by fixing the 3D-ViT encoder and varying the language decoder.} During attack optimization, gradients flow through the entire pipeline, but the perturbation ultimately manipulates the visual and textual representations, not the visual encoder or language decoder parameters. As shown in Table \ref{tab:tab3}, perturbations optimized against Reg2RG (LLaMA-2-7B) demonstrate significant transferability to a Mistral-7B target, with organ-level ASR reaching 0.594, which is nearly double the clean baseline (0.319). In contrast, adversarial transfer from Mistral-7B to LLaMA2-7B is weaker at the organ level, and report-level transferability in this direction (0.334) falls below the corresponding clean baseline (0.404). This suggests that while the attack can effectively degrade the correctness of generated reports under black-box transfer, it is less reliable at inducing specific targeted clinical descriptions. While white-box efficacy remains highest (0.913), the success of these cross-decoder attacks suggests that the vulnerability is not specific to a particular decoder architecture but rather arises from shared multimodal alignment mechanisms and common generation dynamics across LLM decoders. These findings underscore the practical risk of our threat model that an attacker possessing white-box knowledge of a commonly used medical vision encoder can effectively compromise a black-box system even if the specific language model remains proprietary or unknown.
\begin{table}[t]
\caption{Organ Recognition Performance Under Multimodal Adversarial Attack: We examine the ability of MLLM to identify the organs in adversarial CT scans. A decrease in Recall and F1-score indicates the strength of our attack in deteriorating the MLLM's organ detection ability.}
 \vspace{0.5em}  %
\label{tab:tab4}
\centering
\small
\resizebox{0.7\linewidth}{!}{
\begin{tabular}{lcccccc}
\Xhline{2\arrayrulewidth}
\multirow{2}{*}{\bfseries Organ} & \multicolumn{2}{c}{\bfseries Benign Reports} & \multicolumn{2}{c}{\bfseries Adversarial Reports} & \multicolumn{2}{c}{\bfseries Difference} \\
\cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7}
 & \bfseries Recall & \bfseries F1 & \bfseries Recall & \bfseries F1 & \bfseries $\Delta$ Recall & \bfseries $\Delta$ F1\\ \hline
Abdomen            & 0.997 & 0.997 & 0.649 & 0.733 & 0.348 &0.264 \\
Bone               & 0.999 & 0.999 & 0.652 & 0.785 & 0.347&0.214 \\
Breast             & 0.967 & 0.967 & 0.908 & 0.863 & 0.059 & 0.104 \\
Esophagus          & 0.999 & 0.999 & 0.941 & 0.965 & 0.058 & 0.034 \\
Heart              & 0.995 & 0.995 & 0.938 & 0.947 & 0.057&0.048 \\
Lung               & 0.807 & 0.807 & 0.008 & 0.009 & \textbf{0.799}&\textbf{0.798} \\
Mediastinum        & 0.995 & 0.993 & 0.946 & 0.850 & 0.049&0.143 \\
Pleura             & 0.807 & 0.807 & 0.193 & 0.319 & 0.614&0.488 \\
Thyroid            & 0.962 & 0.975 & 0.894 & 0.785 & 0.068&0.190 \\
Trachea \& Bronchi & 0.973 & 0.977 & 0.930 & 0.937 & 0.043&0.040 \\
\cdashline{1-7}\noalign{\vskip 0.2ex}
Average            & 0.950 & 0.952 & 0.706 & 0.719 &0.244&0.233\\
\Xhline{2\arrayrulewidth}
\end{tabular}
}
\end{table}
\subsection{Thoracic Organs Recognition after Adversarial Attack on MLLM}
The baseline MLLM we attack can both identify thoracic organs and generate full CT reports \cite{chen2025large}. Through prompting, the model is instructed to both recognize anatomical structures and produce a comprehensive description of the scan. To evaluate how our adversarial perturbations affect the MLLM's ability to detect thoracic organs, we assess organ-recognition performance under clean and adversarial conditions using Recall and F1 scores. The organ recognition performance is shown in \tableref{tab:tab4}. The baseline model recognizes most organs with near-perfect Recall and F1 scores. With adversarially manipulated multimodal inputs, recognition performance decreases across nearly all organs, indicating that adversarial perturbations disrupt the model's grounding ability. The most severe degradation occurs for the lung, where Recall and F1 drop sharply from 0.807 to nearly zero, consistent with our attack targeting lung-related findings. In contrast, regions less directly tied to the adversarial objective (\textit{e.g.,} Heart, Esophagus, Trachea \& Bronchi) retain relatively high performance, though still lower than the benign baseline. Overall, these results indicate that adversarial perturbations not only manipulate targeted clinical findings but also impair the MLLM's broader ability to correctly identify anatomical thoracic regions.
\begin{table}[t]
\caption{Effect of Adversarial Attack on Detection of Chest Abnormalities: This table reports the degradation in detection of chest findings from CT reports under our proposed adversarial threat. The decrease in Recall and F1 indicate the effectiveness of our attack at concealing critical abnormalities in the generated reports.}
 \vspace{0.5em}  %
\label{tab:tab5}
\centering
\small
    \resizebox{0.9\linewidth}{!}{
    \begin{tabular}{lcccccc}
    \Xhline{2\arrayrulewidth}
\multirow{2}{*}{\bfseries CT findings} & \multicolumn{2}{c}{\bfseries Benign Reports} & \multicolumn{2}{c}{\bfseries Adversarial Reports} & \multicolumn{2}{c}{\bfseries Difference}\\ \cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7}
                       &  \bfseries Recall & \bfseries F1            &  \bfseries Recall & \bfseries F1     & \bfseries $\Delta$ Recall & \bfseries $\Delta$ F1       \\ \hline
Arterial wall calcification    & 0.62  & 0.62 & 0.31  & 0.43   &0.31&0.19      \\
Cardiomegaly                  & 0.12  & 0.16             & 0.0  & 0.0    &0.12&
0.16         \\
Coronary artery wall calcification   & 0.61  & 0.55   & 0.18  & 0.27 &0.43 &0.28  \\
Emphysema   & 0.22  & 0.26     & 0.01  & 0.03  & 0.21 &0.23    \\
Atelectasis                  & 0.65  & 0.39  & 0.0&0.0  & 0.65 &0.39        \\
Lung opacity                 & 0.24  & 0.38       & 0.03  & 0.06      &0.21&0.32       \\
Pulmonary fibrotic sequela      & 0.11  & 0.17   & 0.0 & 0.0  &0.11&0.17       \\   
Pleural effusion        & 0.38  & 0.50    & 0.03  & 0.05   &0.35 &0.45     \\  
Consolidation       & 0.24  & 0.31    &  0.0 &0.0   & 0.24 &0.31      \\  
\cdashline{1-7}\noalign{\vskip 0.2ex}
Average  & 0.35  & 0.37           & 0.06  & 0.09 & 0.29 &0.28  \\
\Xhline{2\arrayrulewidth}
\end{tabular}
}
\end{table}
\subsection{Impact of Adversarial Attack on Detection of Clinical Findings} 
To evaluate how our attack affects the detection of clinically meaningful findings that are extracted from generated chest CT reports, we employ Clinical Efficacy (CE) metrics \cite{Chen2020}. In contrast to conventional NLG metrics that assess textual similarity, CE metrics measure diagnostic fidelity by determining whether key clinical abnormalities are correctly captured in the generated text. Because our adversary aims to attack the MLLM to alter critical diagnostic content, CE metrics provide a direct assessment of the attack’s clinical consequences. We use the RadBERT text classifier \cite{yan2022radbert} to automatically extract abnormalities from reports, and quantify performance using Recall and F1 score. \tableref{tab:tab5} summarizes the results. For each abnormality, we present Recall and F1 scores derived from benign and adversarial reports, measured against the abnormalities mentioned in the ground-truth report. We found that, in adversarial reports, the detection of critical chest findings declines sharply. Many abnormalities, including cardiomegaly, atelectasis, consolidation, and fibrotic sequela, become completely undetectable, indicating that the adversarial attack substantially disrupts the clinical information encoded in the generated reports and thus reduces their diagnostic reliability.
\subsection{Ablation Study on the Impact of Attacking Individual Modalities}
We perform an ablation analysis on Reg2RG baseline model \cite{chen2025large} to evaluate the relative effectiveness of visual-only, text-only, and multimodal input perturbations in adversarial report generation. As shown in Table \ref{tab:tab6}, visual perturbations achieve high anatomical organ-level success (0.92), while text-only perturbations are less effective, likely because the image remains unchanged. The strongest manipulation (0.937) occurs when both modalities are perturbed. Notably, fabrication is more difficult than suppression, suggesting that degrading visual evidence makes it harder for the model to justify the insertion of nonexistent findings.
\begin{table}[t]
    \caption{Ablation analysis of modality-specific and multimodal adversarial attacks for targeted suppression and fabrication at the organ and report levels. Higher ROUGE-L scores denote stronger attack effectiveness.}
     \vspace{0.5em}  %
    \label{tab:tab6}
    \centering
    \small
    \resizebox{1.0\linewidth}{!}{
    \begin{tabular}{lcccc}
        \Xhline{2\arrayrulewidth}
        \multirow{2}{*}{\bfseries Method} 
        & \multicolumn{2}{c}{\bfseries Organ-level} 
        & \multicolumn{2}{c}{\bfseries Report-level} \\ \cmidrule(lr){2-3} \cmidrule(lr){4-5}
        & \bfseries Suppression & \bfseries Fabrication & \bfseries Suppression & \bfseries Fabrication \\
        \midrule
        Baseline \cite{chen2025large} (w/o attack)      &  0.439 &           0.198 &          0.370 &          0.280   \\
        CRA-RG (visual)            &  0.922 &           0.887 &          \textbf{0.456}&  0.272  \\
        CRA-RG (text)              &  0.518 &           0.719 &          0.346 &          0.278     \\
        CRA-RG (visual + text)    &  \textbf{0.937}  &  \textbf{0.888} & 0.410&   \textbf{0.294} \\
        % add more rows as needed
        \Xhline{2\arrayrulewidth}
    \end{tabular}
}
\end{table}
\subsection{Analysis of Multimodal Embedding Space under Adversarial Attack}
To understand how multimodal adversarial inputs affect the MLLM's embedding space, we compute cosine similarity between ground-truth embeddings and embeddings from non-adversarial versus adversarial reports. \figureref{figg:fig3} shows the cosine similarity matrices between ground-truth embeddings and embeddings from non-adversarial (left) versus adversarial reports (right). The non-adversarial similarity matrix exhibits broad consistency with the ground-truth embeddings. In contrast, the adversarial matrix exhibits pronounced horizontal bands, indicating that ground-truth tokens map strongly to a narrow subset of adversarial embeddings. This pattern reflects a distortion in the embedding space induced by the adversarial perturbations, demonstrating that the attack not only alters the textual output but also reshapes internal semantic representations.
\begin{figure*}[t!]
\centering
\includegraphics[width=0.9\linewidth]{Figures/fig3.png}
\caption{
Cosine similarity matrices between ground-truth token embeddings and embeddings from non-adversarial (left) and adversarial (right) reports.
}
\label{figg:fig3}
\end{figure*}
\section{Discussion and Limitations}
This work presents the first systematic study of adversarial attacks on large CT report generation MLLMs. By perturbing multimodal inputs, we generate clinically risky adversarial reports that include targeted suppression and fabrication of findings. While these are the focus of this study, other, more subtle adversarial forms may exist. Our threat model assumes a white-box adversary, which is realistic given the public availability of widely used medical vision encoders. Cross-decoder transferability results suggest that vulnerabilities lie in shared visual representations and multimodal alignment, meaning attacks may persist even with proprietary language models. Finally, developing defenses against such adversarial manipulation remains a key avenue for future work.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Conclusion}
In this work, we presented Clinically Risky Adversarial Report Generation (CRA-RG), a new threat model that characterizes how chest CT radiology report generation systems can be adversarially manipulated to produce clinically dangerous outputs. Our threat model, CRA-RG focuses on clinically meaningful manipulations, including the omission and fabrication of critical findings such as nodules, consolidations, and pleural abnormalities. To instantiate this threat model, we introduced a multimodal targeted adversarial attack that jointly perturbs CT volumes and conditioning text embeddings, enabling fine-grained control over specific anatomical regions. Our experiments on the RadGenome 3D chest CT dataset demonstrated that state-of-the-art multimodal report generation models are highly susceptible to adversarial perturbations. These results provide the first empirical evidence that modern chest CT report generation systems can be driven to produce harmful clinical recommendations~\textemdash~including missing high-risk findings or fabricating nonexistent abnormalities~\textemdash~raising critical safety concerns for real-world deployment.
\clearpage  % Acknowledgements, references, and appendix do not count toward the page limit (if any)
% Acknowledgments---Will not appear in anonymized version
\midlacknowledgments{This work was supported by the IITP (Institute of Information \& Coummunications Technology Planning \& Evaluation)-ITRC
(Information Technology Research Center) grant funded by the Korea government (Ministry of Science and ICT) (IITP-2026-RS-2023-00258649, 50\%), and by the National Research Foundation of Korea (NRF) grant funded by the Korea government (MSIT) (No. RS-2024-00334321, 50\%). We also thank Minkuk Kim and Youngseob Won for helpful discussions in the early stages of this work.}


\bibliography{midl26_293}


\newpage
\appendix

\section{Example of Real and Adversarial CT Report}
\begin{figure*}[h!]
\centering
\includegraphics[width=1.0\linewidth]{Figures/fig4.png}
\caption{
We present an example of a real CT report (left) and an adversarially generated CT report (right) using our proposed multimodal perturbations. Note that the CT report generation model has been successfully fooled to omit the majority of the significant clinical findings in Chest CT (e.g., Consolidation in lungs and lymph nodes in Mediastinum are successfully suppressed in Adversarial CT Report).
%An example of an adversarially attacked chest CT report. We show the CT volume alongside the original organ-wise key findings from the ground-truth report and the corresponding adversarial report. For brevity, only the key phrases for each finding are shown.
}
% \vspace{-3mm}
\label{figg:fig4}
\end{figure*}

\begin{figure*}[h!]
\centering
\includegraphics[width=1.0\linewidth]{Figures/fig5.png}
\caption{
Another Example of a clean (left) and adversarial CT report (right) generated by attacking the MLLM through adversarial multimodal input. The abnormalities in the bone and lung have been successfully omitted.
%An example of an adversarially attacked chest CT report. We show the CT volume alongside the original organ-wise key findings from the ground-truth report and the corresponding adversarial report. For brevity, only the key phrases for each finding are shown.
}
% \vspace{-3mm}
\label{figg:fig5}
\end{figure*}
\clearpage
\newpage


\section{Ablative Study on Perturbation Budget}
To study the impact of the CT perturbation budget ($\epsilon_{\text{ct}}$) on attack success, we perform an ablation experiment in which we vary $\epsilon_{\text{ct}}$ from 16/255 to 8/255 and 4/255 on CT volumes that are successfully attacked at $\epsilon_{\text{ct}}$ = 16/255. We evaluate performance by reporting the attack failure rate. 

Table \ref{tab:tab7} analyzes how the CT perturbation budget ($\epsilon_{\text{ct}}$) affects attack success. Reducing the perturbation budget to 8/255 results in a small increase in attack failures (1\%), indicating that the attack remains largely effective even under tighter visual constraints. When ($\epsilon_{\text{ct}}$) is further reduced to 4/255, the attack failure rate increases to 8\%, suggesting the emergence of a minimum effective perturbation threshold below which adversarial optimization becomes less reliable. Overall, these results highlight a trade-off between attack success and perturbation strength and demonstrate that the proposed attack remains effective at relatively low ($\epsilon_{\text{ct}}$) values.
\begin{table}[H]
    \caption{Attack Robustness Under Varying CT Perturbation Budgets}
     %\vspace{0.5em}  %
    \label{tab:tab7}
    \centering
    %\small
    %\resizebox{1.0\linewidth}{!}{
    \begin{tabular}{lcc}
        \Xhline{2\arrayrulewidth}
       \bfseries Perturbation Budget $\epsilon_{\text{ct}}$ 
        & \bfseries Attack Failure Rate\\ 
        \midrule
        16/255      &  0.0    \\
         8/255      &  0.01    \\
        4/255    & 0.08   \\
        % add more rows as needed
        \Xhline{2\arrayrulewidth}
    \end{tabular}
%}
\end{table}

\clearpage
\newpage

\section{Visual Comparison of Clean and Adversarial CT Scans}
We present examples of clean CT images alongside their adversarial counterparts and corresponding difference maps in Figure \ref{figg:fig6}. As illustrated, the adversarial perturbations are visually imperceptible and do not introduce noticeable artifacts, while the difference maps reveal subtle, spatially distributed changes that are sufficient to induce significant manipulation of the generated reports.
\begin{figure*}[h!]
\centering
\includegraphics[width=1.0\linewidth]{Figures/fig6.png}
\caption{
Comparison of the Visual Quality of Original CT images, Adversarial images, and their difference maps. 
}
% \vspace{-3mm}
\label{figg:fig6}
\end{figure*}

\clearpage
\newpage

\section{Quantifying the Stealth of Adversarial CT Perturbations}
To assess the deviation of adversarial CT images from their original counterparts, we compute perceptual similarity metrics, including SSIM \cite{wang2004ssim}, as well as the mean and maximum voxel-wise intensity differences between clean and adversarial volumes measured in Hounsfield Units (HU). These metrics jointly quantify structural similarity and perturbation magnitude, providing insight into the visual stealth of the proposed attack. As shown in Table \ref{tab:tab8}, the adversarial CT images achieve a high SSIM score of 93.27\%, indicating strong structural preservation relative to the original scans. At the same time, the mean and maximum intensity differences remain limited (14.74 HU and 19.15 HU, respectively), suggesting that the perturbations introduce only subtle intensity variations that are difficult to perceive by visual inspection. Together, these results demonstrate that the proposed attack maintains high visual fidelity.
\begin{table}[H]
    \caption{Evaluation of Visual Adversarial Stealth}
       \vspace{0.5em}
    \label{tab:tab8}
    \centering
    \small
    %\resizebox{1.0\linewidth}{!}{
    \begin{tabular}{lccc}
        \Xhline{2\arrayrulewidth}
       \bfseries Attack Method 
        & \bfseries Mean HU Diff.
        & \bfseries Max. HU Diff. & \bfseries SSIM (\%)\\ 
        \midrule
        CRA-RG      &  14.74 &   19.15  &93.27 \\
        % add more rows as needed
        \Xhline{2\arrayrulewidth}
    \end{tabular}
%}
\end{table}


\clearpage
\newpage

\section{Adversarial Optimization of CT images}
The proposed attack is initialized with random noise, which is then iteratively optimized under anatomical and perceptual constraints to manipulate the generated report. As a result, the perturbations in the early optimization steps indeed resemble unstructured noise, which is expected given the initialization. The final adversarial perturbations are not equivalent to fixed random noise. Through iterative optimization, the perturbations become task-driven and exploit model-specific vulnerabilities, resulting in consistent, targeted changes to the generated reports. We show the evolution of adversarial images at different steps in Figure \ref{figg:fig7}.
\begin{figure*}[h!]
\centering
\includegraphics[width=1.0\linewidth]{Figures/fig7.png}
\caption{
Evolution of Adversarial CT images against different optimization steps. 
}
% \vspace{-3mm}
\label{figg:fig7}
\end{figure*}

\end{document}
