\documentclass{midl} % Include author names

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{mwe} % to get dummy images
\jmlrvolume{-- 194}
\jmlryear{2026}
\jmlrworkshop{Full Paper -- MIDL 2026}
\editors{Accepted for publication at MIDL 2026}


% ----- new packages -----
% \usepackage[T1]{fontenc}
\usepackage{booktabs}
\usepackage{multirow}
% \usepackage{ulem} % For underline formatting
\usepackage{bbding}
\usepackage[table]{xcolor}
% \usepackage{subcaption}
% \usepackage{amsmath}
% \usepackage{amssymb}
\usepackage{graphicx,verbatim}
% \usepackage{pifont}   % for \xmark
% \newcommand{\xmark}{\ding{55}}


\title[UnEBOLT]{UnEBOLT: A Unified Model for EEG-to-BOLD Translation and Functional Connectivity Reconstruction}

 % Use \Name{Author Name} to specify the name.
 % If the surname contains spaces, enclose the surname
 % in braces, e.g. \Name{John {Smith Jones}} similarly
 % if the name has a "von" part, e.g \Name{Jane {de Winter}}.
 % If the first letter in the forenames is a diacritic
 % enclose the diacritic in braces, e.g. \Name{{\'E}louise Smith}

 % Two authors with the same address
 % \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\and
 %  \Name{Author Name2} \Email{xyz@sample.edu}\\
 %  \addr Address}

 % Three or more authors with the same address:
 % \midlauthor{\Name{Author Name1} \Email{an1@sample.edu}\\
 %  \Name{Author Name2} \Email{an2@sample.edu}\\
 %  \Name{Author Name3} \Email{an3@sample.edu}\\
 %  \addr Address}


% Authors with different addresses:
% \midlauthor{\Name{Author Name1} \Email{abc@sample.edu}\\
% \addr Address 1
% \AND
% \Name{Author Name2} \Email{xyz@sample.edu}\\
% \addr Address 2
% }

%\footnotetext[1]{Contributed equally}

% More complicate cases, e.g. with dual affiliations and joint authorship
\midlauthor{\Name{Yamin Li\nametag{$^{1}$}}\Email{yamin.li@vanderbilt.edu}\\
\addr $^{1}$ Vanderbilt University \AND
% \addr $^{2}$ Address 2 \AND
\Name{Ange Lou\nametag{$^{1}$}} \Email{ange.lou@vanderbilt.edu}\\
\Name{Chang Li\nametag{$^{1}$}} \Email{chang.li@vanderbilt.edu}\\
\Name{Shiyu Wang\nametag{$^{1}$}} \Email{shiyu.wang.1@vanderbilt.edu}\\
% \addr $^{3}$ Address 3 \AND
\Name{Haatef 
Pourmotabbed\nametag{$^{1}$}} \Email{haatef.pourmotabbed@vanderbilt.edu}\\
% \addr $^{4}$ Address 4
\Name{Ziyuan Xu\nametag{$^{1}$}} \Email{ziyuan.xu@vanderbilt.edu}\\
\Name{Shengchao Zhang\nametag{$^{1, 2}$}} \Email{shengchao\_zhang@brown.edu}\\
\addr $^{2}$ Rhode Island Hospital (Brown University Health) \AND
\Name{Dario J. Englot\nametag{$^{1, 3}$}} \Email{dario.englot@vumc.org}\\
\addr $^{3}$ Vanderbilt University Medical Center \AND
\Name{Soheil Kolouri\nametag{$^{1}$}} \Email{soheil.kolouri@vanderbilt.edu}\\
\Name{Daniel Moyer\nametag{$^{1}$}} \Email{daniel.moyer@vanderbilt.edu}\\
\Name{Roza G. Bayrak\nametag{$^{1}$}} \Email{roza.g.bayrak@vanderbilt.edu}\\
\Name{Catie Chang\nametag{$^{1}$}} \Email{catie.chang@vanderbilt.edu}
}

\begin{document}

\maketitle

\begin{abstract}
Functional magnetic resonance imaging (fMRI) provides high-resolution, whole-brain dynamic information, but is costly and immobile, limiting its utility in low-resource settings. EEG-to-fMRI translation via deep learning offers a promising alternative, enabling access to deep brain activity from scalp EEG signals in naturalistic settings. However, current state-of-the-art methods for EEG-to-fMRI translation require training separate models for each brain region, limiting efficiency and scalability. 
%Whole-brain fMRI reconstruction remains largely unexplored. 
Here, we introduce \textbf{UnEBOLT}\footnote{https://github.com/neurdylab/UnEBOLT}, a \textbf{Un}ified model for \textbf{E}EG-to-\textbf{BOL}D \textbf{T}ranslation. UnEBOLT is an end-to-end framework that predicts whole-brain fMRI time series from EEG by adaptive multi-region decoding within a single model. This approach enables efficient and comprehensive inference while also reconstructing subject-specific functional connectivity matrices, a representation that provides insight into neuronal interactions and which has been successfully utilized for clinical biomarkers. Our results show that UnEBOLT achieves comparable performance to dedicated ROI-specific models while scaling to multi-region prediction. Additionally, the reconstructed fMRI time series enable functional connectivity estimation, which may have broad applications in neuroscience. 
\end{abstract}

\begin{keywords}
EEG, fMRI, EEG-to-fMRI synthesis
\end{keywords}

\section{Introduction}
Functional magnetic resonance imaging (fMRI) allows for studying whole-brain dynamics with high spatial resolution, providing insights into large-scale neural circuits that underlie cognition, perception, and behavior \cite{van2010exploring}. Functional connectivity (FC), which captures temporal correlations between blood-oxygen-level-dependent (BOLD) signals across different brain regions, has been instrumental in understanding brain networks in both healthy and clinical populations, making it a valuable tool for neuroscience and medical research.
However, the widespread use of fMRI is hindered by its high cost and immobility, limiting accessibility in neuroscience and medical settings. 

Electroencephalography (EEG), in contrast, is a portable and cost-effective modality with high temporal resolution. 
%, making it well-suited for capturing fast neural dynamics. 
However, EEG lacks the spatial specificity of fMRI, making it difficult to infer activity in deep brain structures or resolve large-scale connectivity patterns with the same precision \cite{cohen2017does,chang2021multimodal}. To bridge this gap, EEG-to-fMRI translation via deep learning has recently emerged as a promising direction, enabling the reconstruction of fMRI signals from scalp EEG recordings. Prior works such as \cite{liu2023fusing,liu2023latent,calhas2022eeg,lanzino2024nt,he2025spec2volcamu} have primarily focused on reconstructing spatial features of fMRI volumes and task activation maps. While these studies demonstrate promising whole-brain spatial reconstruction capability, they generally lack a systemic evaluation of temporal reconstruction fidelity, higher-order functional features such as functional connectivity (FC), or cross-dataset generalization.
% but did not quantitatively evaluate reconstructed time courses or connectivity matrices.
On the other hand, studies such as \cite{kovalev2022fmri,li2024leveraging} have investigated fMRI time series reconstruction in deep brain regions using sequence-to-sequence models, evaluating performance by computing the temporal correlation with ground-truth. However, these studies used small sample sizes and relied on subject-specific training, restricting their generalizability. 

Furthermore, most existing research has focused on EEG-to-fMRI synthesis during task-based and eyes-open resting-state conditions, leaving the fully eyes-closed resting state largely unexplored. Resting-state fMRI is of notable importance because it reflects the brain’s spontaneous and intrinsic activity in the absence of explicit task constraints, thereby capturing more diverse neural dynamics that are more challenging to decode. Moreover, resting-state paradigms impose minimal cognitive or behavioral demands on participants and are therefore widely adopted across large-scale and clinical neuroimaging studies, including populations for whom task-based experiments may be impractical or unreliable. A recent advancement, NeuroBOLT, introduced a framework that learns multi-dimensional representations of EEG windows to predict corresponding fMRI data points, achieving state-of-the-art performance on
%in modeling fMRI temporal characteristics for 
eyes-closed resting-state data. However, NeuroBOLT, along with \cite{kovalev2022fmri,li2024leveraging}, 
%are constrained by a one-model-per-ROI paradigm, 
require separate models to be trained for each brain region, which is computationally intensive for whole-brain inference. Additionally, only a small subset of brain regions was examined in these studies \cite{kovalev2022fmri,li2024leveraging,NEURIPS2024_Li}, leaving open the question of EEG’s predictive power for fMRI signals across the whole brain. These limitations 
%hinder both the generalization and scalability of current EEG-to-fMRI translation methods, 
highlight the need for a more efficient, whole-brain approach capable of learning shared representations across regions while preserving fMRI's rich temporal structure.

Here, we introduce a novel and scalable EEG-to-fMRI translation framework for whole-brain inference. The key contributions of this work are: 
\begin{itemize}
\item  \textbf{Efficient end-to-end EEG-to-fMRI translation framework.} Our framework reconstructs whole-brain fMRI dynamics within a single model, significantly reducing computational overhead while maintaining signal reconstruction fidelity. 
\item \textbf{Adaptive Multi-Region Decoding.} Our model dynamically refines ROI-specific representations by conditioning fMRI predictions on global EEG features, ensuring both regional specificity and whole-brain coherence. A multi-objective loss function integrates temporal and spatial correlation constraints, preserving neural dynamics and functional relationships across the brain. 
\item \textbf{Cortex-wide evaluations across multi-region and multiple conditions.} We performed extensive experiments on both subject-specific and cross-subject learning in ROI-level and brain-network-level. Further, we assess functional connectivity (FC) reconstruction at multiple scales, making this the first deep learning model to be evaluated on FC recovery, and additionally evaluated zero-shot generalization to task-condition datasets collected under different acquisition settings.
\end{itemize}



\section{Methods}
\subsection{Problem Definition}
\label{task_formulation}
Four-dimensional fMRI data are often summarized using brain parcellation techniques that average voxel-wise signals within predefined regions-of-interest (ROIs). This approach offers a practical trade-off between spatial resolution and computational efficiency, yielding higher signal-to-noise ratio (SNR) and more stable time courses while retaining the organizational structure of the brain. The resulting parcellated fMRI can be expressed as $Y \in \mathbb{R}^{P \times K}$, where each row corresponds to the time series of a specific ROI, with $K$ denoting the total number of fMRI time points. EEG windows spanning a duration $T$ approximating the hemodynamic response function (HRF) before each fMRI frame are extracted as input, represented as $X\in\mathbb{R}^{C\times T}$, where $C$ is the number of electrodes. Thus, given the model $f_\theta(.)$, the fMRI prediction is formulated as $\hat{Y}_{t}= f_\theta(X_{t-T:t-1})$, where $\hat{Y}_{t}\in \mathbb{R}^P$ is the reconstructed fMRI frame at time index $t$.

\begin{figure}[t]
\includegraphics[width=\textwidth]{figures/figure1_new.png}
\caption{UnEBOLT pipeline. 
We tokenize EEG windows into patches and use our encoder to extract spatiotemporal and spectral embeddings, which are adaptively fused and refined by ROI-specific representation learning module for prediction.
% EEG windows are tokenized into patches and processed by the encoder to extract spatiotemporal and spectral embeddings, which are adaptively fused and refined by the ROI-specific representation learning module for final prediction.}
}
\label{fig1}
\end{figure}



\subsection{Model Architecture}
We propose an end-to-end framework, UnEBOLT, for predicting multi-region fMRI signals from EEG recordings. Our approach learns a comprehensive mapping from EEG to fMRI data while preserving the inter-regional dependencies characteristic of fMRI data. The framework is composed of three key modules: (1) \textbf{Multi-dimensional Encoder} (2) \textbf{Gated Adaptive Fusion Module} (3) \textbf{ROI-specific Representation Learning Module}: a decoder that comprises 3 key components - i. Learnable ROI Lookup Table, ii. ROI Representation Embedder, iii. ROI Prediction Head. Together, these modules translate EEG-derived features to region-aware fMRI predictions.

\textbf{Multi-dimensional Encoder.}
We leverage the multi-dimensional EEG encoder \cite{NEURIPS2024_Li} as backbone to extract complementary \textit{spatiotemporal} and \textit{multi-scale spectral} embeddings from raw EEG signals $X \in \mathbb{R}^{C \times T}$. \textbf{Spatiotemporal Features:}  
We first segment $X$ using a window of length $w$ and a stride of $w$, yielding patches $x_{c,k} \in \mathbb{R}^w$ for each channel $c = 1, \dots, C$ and patch index $k = 1, \dots, \lfloor T/w \rfloor$. Each patch is mapped to a $d$-dimensional embedding $e_{c,k}$ through a convolutional temporal encoder and further enriched with trainable temporal ($te_k$) and spatial ($se_c$) embeddings: $e_{\text{st}}^{c,k} = e_{c,k} + te_k + se_c$.
These embeddings are processed by a Transformer encoder and subsequently average-pooled to form the spatial–temporal representation $r_{\text{st}} \in \mathbb{R}^{d}$. \textbf{Spectral Features:}  
In parallel to the spatiotemporal pathway, we incorporate a multi-scale spectral embedder to capture frequency-domain characteristics of EEG that are tightly linked to neural oscillatory dynamics. EEG contains meaningful information at multiple frequency bands (e.g., delta, theta, alpha, beta), and using a single spectral scale risks missing fine- or coarse-grained patterns. To address this, we compute spectral representations at multiple adaptive window sizes.
Specifically, we apply the short-time Fourier transform (STFT) to $X$ at several scales. Given the base window scale $w_b$, at each level $l$ (with window $w_l = w_b \times 2^l$), we obtain patches $x_{l,c,k} \in \mathbb{R}^{w_l}$ and compute their FFT to yield spectra $s_{l,c,k} \in \mathbb{R}^{\frac{w_l}{2}+1}$ with $k=1,...,\lfloor T/w_{l} \rfloor$. These multi-level spectra are processed through frequency and temporal embedding modules, generating window embeddings with same shape $we_l$, which are then integrated to form the final spectral embedding: $e_{\text{sp}} = \sum_{l=0}^{L} we_{l} \in \mathbb{R}^{C \times n \times d}.$
For each EEG channel, we apply positional embeddings, followed by a linear Transformer encoder \cite{katharopoulos-et-al-2020} and average pooling operation, producing the final spectral representation: $r_{\text{sp}} \in \mathbb{R}^{d}$.

\textbf{Gated Adaptive Fusion (GAF).}
We introduce GAF module to integrate the complementary features
 $r_{\text{st}}, r_{\text{sp}} \in \mathbb{R}^{d}$ derived from EEG encoder. 
%Let $r_{\text{st}} \in \mathbb{R}^{d}$ denote the spatiotemporal representation and $r_{\text{sp}} \in \mathbb{R}^{d}$ the spectral representation. 
The fused representation is computed as:
$r_{\text{fused}} = g \cdot r_{\text{st}} + (1 - g) \cdot r_{\text{sp}},$
where the gating factor $g$ is dynamically learned via a small multilayer perceptron (MLP). Two embeddings are first concatenated to derive $r_{\text{cat}} \in \mathbb{R}^{2d}$, and then processed by the MLP $g = \sigma\Big(W_2 \, \text{GELU}\big(W_1 \, r_{\text{cat}}\big)\Big)$,
% \begin{equation}
% g = \sigma\Big(W_2 \, \text{GELU}\big(W_1 \, r_{\text{cat}}\big)\Big),
% \end{equation}
where $W_1$ and $W_2$ are learnable weight matrices and $\sigma$ is the sigmoid activation function. This adaptive gating mechanism emphasizes the most informative features from each dimension. 

\textbf{Learnable ROI Lookup Table (LROI-LUT).}
LROI-LUT is a trainable embedding that captures region-specific features while incorporating global EEG context. The LROI-LUT consists of $P$ learnable embeddings with random initialization, each corresponding to a predefined ROI. 
%These embeddings are randomly initialized and updated during training. 
During training, the global EEG representation $r_{\text{fused}}$ is concatenated with each ROI embedding $r_i \in \mathbb{R}^{d_r}$ from the lookup table: $e_i = \big[r_{\text{fused}}; \, r_i\big]$, for all $ i \in \{1, \dots, P\}$,
conditioning each ROI on EEG signal adaptively.
% This operation ensures that each ROI is adaptively conditioned on the EEG signal. 
The concatenated embedding $e_i$ is then passed to the RRE (described next) to transform the EEG-domain information into ROI-aware representation.

\textbf{ROI Representation Embedder (RRE).}
RRE is a lightweight LoRA-like adapter \cite{hu2022lora,wang2024mindbridge} that transforms EEG-informed embeddings into a space aligned with fMRI representations. It compresses high-dimensional features into a lower-dimensional bottleneck, then restores them to the original space, optionally incorporating a residual connection for stability. Let $e \in \mathbb{R}^{D}$ denote an input concatenated embedding (i.e., one of the $e_i$). The process is as follows: \textbf{(1)} The input embedding is passed through a down-projection layer to obtain a bottleneck embedding $e_b = \text{ReLU}\Big(\text{Linear}_{\text{down}}(e)\Big)$.
    % \begin{equation}
    % e_b = \text{ReLU}\Big(\text{Linear}_{\text{down}}(e)\Big).
    % \end{equation}
\textbf{(2)} 
% After applying dropout for regularization, the bottleneck embedding is up-projected to restore the embedding $\hat{e}$ to the original dimension 
After applying dropout for regularization, the bottleneck embedding is up-projected back to its original dimensionality using the transformation $\hat{e} = \text{Linear}_{\text{up}}(e_b)$.
    % \begin{equation}
    % \hat{e} = \text{Linear}_{\text{up}}(e_b).
    % \end{equation}
\textbf{(3)} A learnable scaling factor $\alpha$ modulates the up-projected features, and the final output is computed with a residual connection:
    $e_{\text{out}} = \alpha \cdot \hat{e} + e.$
    Finally, the generated ROI-specific refined embeddings are then processed by a set of \textbf{ROI-specific linear prediction heads} to predict the final fMRI signal.


\textbf{Multi-Objective Loss.}
For predicting multi-region signals and reconstructing FC, capturing inter-regional interactions is crucial, as reconstructing individual ROIs may overlook these dependencies. To address this, we enforce biologically plausible constraints by integrating mean squared error (MSE), temporal correlation loss $\mathcal{L}_{\text{tcorr}}$, and spatial correlation loss $\mathcal{L}_{\text{scorr}}$ into a single objective:
\[
\mathcal{L}_{\text{MO}} = \alpha\, \mathcal{L}_{\text{MSE}} + \beta\, \left(\frac{1}{P}\sum_{p=1}^{P}\Bigl(1-R_{\text{t},p}\Bigr)\right) + (1-\alpha-\beta)\, \left(\frac{1}{B}\sum_{b=1}^{B}\Bigl(1-R_{\text{s},b}\Bigr)\right),
\]
where \(\alpha\) and \(\beta\) control the contributions of each term, \(R_{\text{t},p}\) is the Pearson correlation coefficient computed along the temporal axis (per ROI) for the \(p\)th time series, and \(R_{\text{s},b}\) is computed along the spatial axis (per time point) for the \(b\)th batch element. During training, time points in each batch remain in their original order to compute the temporal loss, while the batch order is shuffled to prevent overfitting to sequential patterns.
% For predicting multi-region signals and reconstructing FC, capturing the interaction between brain regions is crucial. Previous approaches that seek to reconstruct individual ROIs may fail to capture such dependencies. 
% %often rely on direct signal comparisons, but they may fail to preserve the complex spatiotemporal dependencies inherent in brain activity. 
% To address this, we integrate spatial ($scorr$) and temporal correlation ($tcorr$) losses alongside mean squared error (MSE) loss to enforce biologically plausible constraints and improve prediction fidelity, each loss weighted by a corresponding hyperparameter:
% \begin{equation}
%     \mathcal{L}_{\text{MO}} = \alpha \mathcal{L}_{\text{MSE}}+\beta \mathcal{L}_{\text{tcorr}}+(1-\alpha-\beta)\mathcal{L}_{\text{scorr}},
% \end{equation}
% where $\alpha$ and $\beta$ control the relative contributions of each loss term. During training, time points within each batch are preserved in their original temporal order to compute temporal correlation loss, while batch order is shuffled to prevent overfitting to sequential patterns. The correlation losses are defined as:

% \begin{equation}
%     \textstyle\mathcal{L}_{\text{tcorr}} = \frac{1}{P} \sum_{p=1}^{P} \left(1 - R_{\text{t},p} \right); \mathcal{L}_{\text{scorr}} = \frac{1}{B} \sum_{b=1}^{B} \left(1 - R_{\text{s},b} \right),
% \end{equation}
% where \( R_{t} \) and \( R_{s} \) are Pearson correlation coefficients between the predicted and true fMRI time series (\(\hat{Y}\) and \(Y \in \mathbb{R}^{P \times B}\)). Specifically, \( R_{t} \) is computed along the \textit{temporal } axis per ROI, while \( R_{s} \) is computed along the \textit{spatial} axis per time point.








\section{Experiments and Analysis}
\subsection{Dataset and Experimental Settings}

\paragraph{Dataset} We conducted our experiments on the shared resting-state EEG-fMRI dataset from \cite{NEURIPS2024_Li}, following the same pre-processing pipelines. 
This dataset comprises 29 simultaneous EEG-fMRI scans from 22 healthy volunteers in an eye-closed resting-state with 7 participants having two scans.
Each scan lasts 20 minutes.
% This dataset comprises simultaneous EEG and fMRI recordings from 22 healthy volunteers in an eyes-closed resting state, with 7 participants having two scans, yielding a total of 29 scans, each lasting 20 minutes. 
Scalp EEG was recorded using a 32-channel MR-compatible system (10-20 system), with 26 channels retained after excluding ECG, EOG, and EMG channels.
% with ECG, EOG, and EMG channels excluded, leaving 26 channels for analysis. 
Dictionaries of Functional Modes atlas \cite{dadi2020fine} with n=64, 128, 256 ROIs are used to extract fMRI signals.
% For fMRI signal extraction, we employed functional parcellation using the Dictionaries of Functional Modes (DiFuMo) \cite{dadi2020fine}, with n=64, 128, 256 ROIs. 
We extract a 16-second EEG window, which is resampled to 200Hz, prior to each fMRI time point as the model's input for fMRI frame prediction. Further details of the EEG and fMRI preprocessing steps can be found in the original NeuroBOLT paper \cite{NEURIPS2024_Li}.
% EEG windows of 16 seconds before each fMRI time point were extracted to serve as model input for predicting the corresponding fMRI frame.

%\subsubsection{Implementation Details}
% We conduct our experiments in Pytorch 2.0.1 \cite{paszke2019pytorch}, Python 3.9.12, on a single NVIDIA RTX A5000 GPU with CUDA 11.8. The batch sizes are 16 and 64 for intra-scan and cross-subject analyses, respectively. We train our model for 20 epochs using AdamW optimizer, with an initial learning rate of 3e-4, a weight decay of 0.05, and a minimal learning rate of 1e-6. The EEG embedding and RRE bottleneck dimensions are set to 200 and 128, respectively. $d_r$ is set equal to the number of ROI. MO loss weights are set to $\alpha=0.8$ and $\beta=0.1$. Metrics for evaluating the FC reconstruction are calculated only for the upper (or lower) triangular part of the FC matrix, excluding the diagonal. For train/validation/test splits, we follow the exact same dataset partitioning as \cite{NEURIPS2024_Li} to ensure consistency. For subject-specific predictions, an 8:1:1 split is used with 20 second gaps between sets to prevent data leakage. For cross-subject analysis, the dataset is divided in a 3:1:1 ratio. The spatiotemporal module in the EEG encoder is initialized with pretrained LaBraM-base model \cite{jiang2024large}, using a token length of 200 (1 second, no overlap). For the multi-scale spectral module in the EEG encoder, we set the smallest scale size $l_0=100$, i.e., 0.5 seconds without overlap. For a fair comparison, all reproduced baselines are implemented using their official code. 
% %To ensure reproducibility and foster future research, 
% Our code will be released upon acceptance.

\paragraph{Implementation Details} We conduct our experiments using PyTorch 2.0.1~\cite{paszke2019pytorch} and Python 3.9.12 on a single NVIDIA RTX A5000 GPU (CUDA 11.8) with batch sizes of 16 for intra-scan and 64 for cross-subject analyses, training for 20 epochs using AdamW (initial learning rate \(3\times10^{-4}\), weight decay 0.05, minimal learning rate \(1\times10^{-6}\)). The EEG embedding and RRE bottleneck dimensions are 200 and 128, respectively, with \(d_r\) equal to the number of ROIs, and MO loss weights \(\alpha=0.8\) and \(\beta=0.1\). FC reconstruction metrics are computed on the upper (or lower) triangular portion of the matrix, excluding the diagonal. We follow the data partitioning in~\cite{NEURIPS2024_Li}: an 8:1:1 split with 20-second gaps for intrascan predictions and a 3:1:1 ratio for cross-subject analysis. For subjects with multiple scans, all scans from the same individual are assigned to the same split to avoid information leakage during the cross-subject analysis. The EEG encoder's spatiotemporal module is initialized with the pretrained LaBraM-base model~\cite{jiang2024large} (token length 200, 1 second without overlap), and the multi-scale spectral module uses a smallest scale size \(l_0=100\) (0.5 seconds without overlap). 
We use official released codebases for previous models for fair comparison. Our code will be released upon acceptance.
% For a fair comparison, all reproduced baselines use their official code, and our code will be released upon acceptance.



\subsection{Results}
We conduct experiments in two scenarios: \textbf{intrascan prediction}, 
where we train and test our model on the first 16 minutes and the last 2 minutes of the same scan, respectively;
and \textbf{unseen subject prediction}, where the model is trained on a training set (18 scans) and evaluated on 6 held-out scans for full-scan (20 min) reconstruction.
\begin{figure}[t]
\includegraphics[width=\textwidth]{figures/figure2.png}
\caption{(a) Unseen subject prediction performance compared with ROI-specific baselines. [Bars: mean temporal $R$, error bars: S.D.; *$p<0.05$, **$p<0.01$, ***$p<0.001$: the paired t-test significance between our model and each baseline. Cu: cuneus, He: Heschl's gyrus, MF: middle frontal gyri, PA: anterior precuneus, Pu: Putamen, Th: Thalamus]. (b) Prediction accuracy
%Temporal $R$ between predicted and true signal 
in various brain regions. (c) Intrascan (left) and unseen subject (right) predictions. Each point represents the mean temporal $R$ across all scans for a given ROI.}
\label{fig2}
\end{figure}
We evaluated the model’s performance in two key aspects: (1) time course prediction, quantified as the temporal correlation ($R$) between predicted and real signals; (2) FC reconstruction, assessed by comparing FC metrics derived from the generated and true signals, including pixel-wise correlation (Pixcorr), connectivity MSE (ConnMSE) to measure structural deviations in network topology, and F1 scores for edge detection, measuring how well the model identifies the top 25\% and 50\% of the strongest connections.
%in FCs. Unless specified otherwise, the experiments are conducted with P=64.



%\subsubsection{Performance Comparison}
\subsubsection{Performance Comparison} 
We compare our model with the SOTA baselines in EEG encoding \cite{jiang2024large,yang2023biot,li2022motor,song2021transformer,peh2022transformer} and EEG-to-fMRI translation \cite{NEURIPS2024_Li,li2024leveraging,kovalev2022fmri}. Since all baseline models are ROI-specific models (RSM), we first evaluate our unified model (UM) against RSM baselines in reconstructing fMRI signals across selected brain regions in \cite{NEURIPS2024_Li}, following the same benchmarking protocol. As shown in Fig.\ref{fig2}(a), our framework, trained jointly on 64 brain regions within a single architecture, achieves competitive reconstruction performance while being significantly more efficient compared with iterative ROI-specific modeling.
% Training UnEBOLT with 64 ROIs takes 58.3 minutes, whereas NeuroBOLT requires 46.4 minutes per ROI, making our approach ~51 times faster while maintaining accuracy comparable to models trained separately for each region.

% UnEBOLT with 64 ROIs trains in 58.3 minutes, making it about 51 times faster than NeuroBOLT \cite{NEURIPS2024_Li} (46.4min per ROI), while maintaining comparable accuracy with models trained separately for each region.


\begin{table}[t]
\caption{Merged comparison and ablation studies (P=64), and model performance across different numbers of ROIs, reported as Mean{\scriptsize(std)}. \textbf{Bold}: the best mean value; gray: previous SOTA; blue: architecture ablation; green: loss ablation; yellow: our full model.}
\centering
\footnotesize
\resizebox{\textwidth}{!}{%
\begin{tabular}{c l c c c c c c}
\toprule
 & \textbf{Method} & \textbf{Multi-Region} & \textbf{Tcorr} & \textbf{Pixcorr} & \textbf{ConnMSE} & \textbf{F1-0.25} & \textbf{F1-0.5} \\
\midrule
\multirow{5}{*}{\rotatebox{90}{\texttt{intra (P=64)}}} 
  & \cellcolor{gray!20}BIOT \cite{yang2023biot}     & \XSolidBrush & 0.339{\scriptsize(0.169)} & 0.396{\scriptsize(0.206)} & 0.224{\scriptsize(0.134)} & 0.421{\scriptsize(0.090)} & 0.635{\scriptsize(0.068)} \\
  & \cellcolor{gray!20}LaBraM \cite{jiang2024large}   & \XSolidBrush & 0.184{\scriptsize(0.164)} & 0.248{\scriptsize(0.226)} & 0.381{\scriptsize(0.229)} & 0.366{\scriptsize(0.104)} & 0.591{\scriptsize(0.077)} \\
  & \cellcolor{gray!20}Li et al. \cite{li2024leveraging}  & \XSolidBrush & 0.391{\scriptsize(0.195)} & 0.451{\scriptsize(0.224)} & \textbf{0.154{\scriptsize(0.101)}} & 0.466{\scriptsize(0.104)} & 0.660{\scriptsize(0.074)} \\
  & \cellcolor{gray!20}NeuroBOLT \cite{NEURIPS2024_Li} & \XSolidBrush & 0.369{\scriptsize(0.164)} & 0.402{\scriptsize(0.213)} & 0.259{\scriptsize(0.182)} & 0.427{\scriptsize(0.088)} & 0.641{\scriptsize(0.070)} \\
  & \cellcolor{yellow!20}Ours (P=64)     & \(\checkmark\) & \textbf{0.416{\scriptsize(0.166)}} & \textbf{0.490{\scriptsize(0.176)}} & 0.201{\scriptsize(0.148)} & \textbf{0.474{\scriptsize(0.067)}} & \textbf{0.672{\scriptsize(0.062)}} \\
\midrule
\multirow{17}{*}{\rotatebox{90}{\texttt{unseen subject prediction (P=64)}}} 
  & \cellcolor{gray!20}ST-Transformer \cite{song2021transformer}  & \XSolidBrush  & 0.107{\scriptsize(0.077)}  & 0.295{\scriptsize(0.132)}  & 0.244{\scriptsize(0.025)}  & 0.368{\scriptsize(0.062)}  & 0.593{\scriptsize(0.057)} \\
  & \cellcolor{gray!20}CNN-Transformer \cite{peh2022transformer}   & \XSolidBrush  & 0.105{\scriptsize(0.096)}  & 0.226{\scriptsize(0.113)}  & 0.281{\scriptsize(0.043)}  & 0.343{\scriptsize(0.040)}  & 0.576{\scriptsize(0.060)} \\
  & \cellcolor{gray!20}FFCL \cite{li2022motor}                      & \XSolidBrush  & 0.190{\scriptsize(0.067)}  & 0.245{\scriptsize(0.049)}  & 0.136{\scriptsize(0.115)}  & 0.401{\scriptsize(0.012)}  & 0.605{\scriptsize(0.024)} \\
  & \cellcolor{gray!20}BIOT \cite{yang2023biot}                      & \XSolidBrush  & 0.413{\scriptsize(0.035)}  & 0.486{\scriptsize(0.135)}  & 0.098{\scriptsize(0.055)}  & 0.444{\scriptsize(0.055)}  & 0.626{\scriptsize(0.046)} \\
  & \cellcolor{gray!20}LaBraM \cite{jiang2024large}                    & \XSolidBrush  & 0.214{\scriptsize(0.058)}  & 0.393{\scriptsize(0.089)}  & 0.122{\scriptsize(0.068)}  & 0.460{\scriptsize(0.076)}  & 0.611{\scriptsize(0.049)} \\
  & \cellcolor{gray!20}BEIRA \cite{kovalev2022fmri} & \XSolidBrush &
  0.148{\scriptsize(0.102)} & 0.389{\scriptsize(0.159)} & 0.293{\scriptsize(0.055)} &
  0.380{\scriptsize(0.067)} & 0.597{\scriptsize(0.048)}\\
  & \cellcolor{gray!20}Li et al \cite{li2024leveraging} & \XSolidBrush &
  0.370{\scriptsize(0.038)} & 0.483{\scriptsize(0.109)} & 0.080{\scriptsize(0.047)} &
  0.472{\scriptsize(0.056)} & 0.642{\scriptsize(0.053)}\\
  & \cellcolor{gray!20}NeuroBOLT \cite{NEURIPS2024_Li}               & \XSolidBrush  & 0.413{\scriptsize(0.052)}  & 0.428{\scriptsize(0.145)}  & 0.126{\scriptsize(0.080)}  & 0.475{\scriptsize(0.057)}  & 0.637{\scriptsize(0.053)} \\
\cmidrule(lr){2-8}
  & \cellcolor{blue!20}w/o ROI-specific head      & \(\checkmark\)  & 0.407{\scriptsize(0.043)} & 0.217{\scriptsize(0.093)} & 0.201{\scriptsize(0.093)} & 0.333{\scriptsize(0.029)} & 0.576{\scriptsize(0.036)} \\
  & \cellcolor{blue!20}w/o ROI embedder           & \(\checkmark\)  & 0.417{\scriptsize(0.050)} & 0.484{\scriptsize(0.097)} & 0.096{\scriptsize(0.069)} & 0.533{\scriptsize(0.054)} & 0.667{\scriptsize(0.041)} \\
  & \cellcolor{blue!20}w/o LROI-LUT           & \(\checkmark\)  & 0.410{\scriptsize(0.045)} & 0.480{\scriptsize(0.103)} & 0.096{\scriptsize(0.065)} & 0.509{\scriptsize(0.018)} & 0.670{\scriptsize(0.042)} \\
  & \cellcolor{blue!20}w/o GAF                    & \(\checkmark\)  & 0.407{\scriptsize(0.049)} & 0.541{\scriptsize(0.095)} & 0.072{\scriptsize(0.055)} & 0.544{\scriptsize(0.067)} & 0.684{\scriptsize(0.043)} \\
\cmidrule(lr){2-8}
  & \cellcolor{green!20}only mse loss              & \(\checkmark\)  & 0.400{\scriptsize(0.029)} & 0.464{\scriptsize(0.098)} & 0.099{\scriptsize(0.069)} & 0.513{\scriptsize(0.035)} & 0.668{\scriptsize(0.033)} \\
  & \cellcolor{green!20}w/o mse loss               & \(\checkmark\)  & 0.392{\scriptsize(0.071)} & 0.510{\scriptsize(0.113)} & 0.086{\scriptsize(0.059)} & 0.496{\scriptsize(0.068)} & 0.668{\scriptsize(0.043)} \\
  & \cellcolor{green!20}w/o scorr loss             & \(\checkmark\)  & 0.402{\scriptsize(0.052)} & 0.480{\scriptsize(0.120)} & 0.114{\scriptsize(0.072)} & 0.516{\scriptsize(0.033)} & 0.672{\scriptsize(0.035)} \\
  & \cellcolor{green!20}w/o tcorr loss             & \(\checkmark\)  & 0.401{\scriptsize(0.064)} & 0.498{\scriptsize(0.099)} & \textbf{0.071{\scriptsize(0.047)}} & 0.520{\scriptsize(0.050)} & 0.678{\scriptsize(0.037)} \\
  & \cellcolor{yellow!20}Ours (P=64)                  & \(\checkmark\)  & \textbf{0.418{\scriptsize(0.036)}}  & \textbf{0.549{\scriptsize(0.120)}}  & 0.092{\scriptsize(0.066)}         & \textbf{0.545{\scriptsize(0.074)}}  & \textbf{0.690{\scriptsize(0.047)}} \\
  % \cmidrule(lr){2-8}
  \midrule
  & \cellcolor{yellow!20}Ours (P=128)  & \(\checkmark\) & 0.386{\scriptsize(0.044)}   & 0.560{\scriptsize(0.113)}   & 0.103{\scriptsize(0.071)}   & 0.516{\scriptsize(0.050)}   & 0.678{\scriptsize(0.038)} \\
  & \cellcolor{yellow!20}Ours (P=256)  & \(\checkmark\) & 0.360{\scriptsize(0.047)}   & 0.621{\scriptsize(0.083)}   & 0.152{\scriptsize(0.065)}   & 0.511{\scriptsize(0.042)}   & 0.684{\scriptsize(0.035)} \\
\bottomrule
\end{tabular}%
}
\label{tab:merged_total}
\end{table}






To extend the comparison to a whole-brain analysis, we adapt the RSM baselines by modifying the final projection layer to map the latent embeddings to the entire set of ROIs instead of a single region, thereby constructing UM baselines. 
% We adapt RSM baselines to construct UM baselines for whole-brain analysis by modifying final projection layers to map latent embeddings to the entire set of ROIs instead of a single region.
UnEBOLT outperforms all baselines in signal prediction and FC reconstruction across intrascan and unseen subject settings (top two section of Table \ref{tab:merged_total}). These results suggest that using a single final projection layer in the baseline backbone is not sufficient to capture dependencies among ROIs, highlighting the importance of a structured multi-region learning approach. 

Fig.\ref{fig2} (b) shows the spatial distribution of prediction accuracy 
%(temporal $R$ between prediction and g.t.) 
of our model. To further analyze the prediction accuracy across functional networks (Fig.\ref{fig2}(c)), each ROI is mapped to its corresponding functional network, as defined by Yeo’s 7-network parcellation \cite{yeo2011organization}. 
% Across both conditions, 
The Somatomotor Network exhibits the highest prediction accuracy in both conditions, followed by the Dorsal Attention and Visual networks. 
Additionally, intrascan predictions exhibit greater individual variability, while unseen subject predictions achieve higher overall accuracy. This trend may be attributed to the shorter temporal window used for intrascan evaluation, which limits temporal coverage and constrains the range of brain states observed during training, leading to higher variability in the estimated representations. Perhaps as a result, model performance under this setting is more sensitive to the specific brain state during the testing interval. In contrast, unseen-subject prediction is evaluated on full-length scans, possibly yielding more stable and reliable estimates.
% likely benefiting from training on a larger dataset, which improves model generalization. 
% Intrascan predictions have greater individual variability while unseen subject predictions attain higher overall accuracy.
% We speculate that the improved generalization results from training on a larger dataset.
Interestingly, the Visual Cortex exhibits higher predictability in the intrascan setting, suggesting that subject-specific training may better capture individual visual-process variations.

\begin{figure}
\includegraphics[width=\textwidth]{figures/fig4_networkwise.png}
\caption{Network-wise fMRI time-series reconstruction performance on unseen scans.
(a) Prediction performance across functional networks; each dot corresponds to an individual unseen scan.
(b) Representative reconstruction examples, with each subplot showing one example drawn from each network.
}
\label{fig4}
\end{figure}

Beyond the ROI-wise analysis, we also examined whether the model can recover coherent dynamics at the network level, rather than only at the individual ROI level. For each functional network, we averaged the fMRI time series across all ROIs belonging to that network to obtain a representative network-level signal, and evaluated how well the network-level signal reconstructed from our model matches the corresponding ground-truth trajectory. This analysis allows us to assess whether UnEBOLT preserves mesoscale functional organization and whether the predicted regional signals collectively recover higher-level network dynamics. Fig.~\ref{fig4} summarizes the network-level reconstruction performance. Panel (a) shows the distribution of reconstruction accuracy across networks for unseen scans, while panel (b) presents example time-series visualizations, each depicting the best prediction instance within a given network. Consistent with our ROI-wise findings, UnEBOLT accurately recovers temporal fluctuations at the network scale, demonstrating that the model captures not only localized region-level patterns but also their coordinated network-level interactions. Across networks, the Somatomotor (SomMot) network again exhibits the highest reconstruction accuracy, followed by the Dorsal Attention and Visual networks. This ordering closely parallels the ROI-wise results in Fig.~\ref{fig2}, and is consistent with prior EEG–fMRI studies showing that sensory–motor and visual systems display some of the strongest electrophysiology–hemodynamic coupling during rest \cite{xavier2025consistency}. Overall, these findings highlight that UnEBOLT learns representations that generalize from fine-grained ROI predictions to coherent, mesoscale network dynamics.





\begin{figure}
\includegraphics[width=\textwidth]{figures/fig3.jpg}
\caption{Examples of whole-scan FC reconstruction from predictions on an unseen subject across varying ROI resolutions and sparsity ratios. Top row: ground-truth FC; bottom row: predicted FC. ROIs are grouped by functional network and ordered contiguously within each group, with network membership indicated by the accompanying color bars.
%across 64, 128, 256 ROIs with sparsity ratios of 0.25 and 0.5. 
%ROI: 64, 128, 256. Threshold ratio: 0.25, 0.5.
}
\label{fig3}
\end{figure}


The final section of Table \ref{tab:merged_total} evaluates the model's performance in predicting unseen subjects' fMRI data across different levels of spatial granularity (i.e., varying numbers of ROIs; example visualization: Fig.~\ref{fig3}).
%and an example visualization is shown in Fig.\ref{fig3}. 
As illustrated in Fig.~\ref{fig3}, the recovered FC matrices closely resemble the ground truth across different parcellation levels and sparsity thresholds. For coarser parcellations (P=64), the block structure of canonical functional networks is clearly recovered, yielding high correspondence with an F1 score up to 0.77 at the 0.50 threshold.
Predicting time series for a larger number of ROIs is more challenging, as reflected in decreased Tcorr, ConnMSE, and F1 for the 0.25 connectivity threshold, likely due to increased complexity and reduced signal-to-noise ratio in finer parcellations. 
% The decrease in Tcorr, ConnMSE, and F1 for the 0.25 connectivity threshold shows that time series prediction for a large number of ROIs is more challenging, likely due to increased complexity and reduced signal-to-noise ratio.
Interestingly, however, higher ROI resolution improves the spatial structure prediction of FC, as indicated by increasing Pixcorr. 
%this may result from better alignment with ground-truth connectivity patterns, 
%where finer parcellations provide more localized representations of brain activity. However, the increased number of ROIs may introduce higher variability and noise, making it harder to accurately model temporal dependencies, leading to an overall trade-off between spatial precision and temporal consistency.

% \begin{figure}[h]
% \includegraphics[width=\textwidth]{figures/fig3.jpg}
% \caption{Visualization of FC reconstruction from predicted signals of an unseen subject across 64, 128, 256 ROIs with sparsity ratios of 0.25 and 0.5. Top row: ground truth. Bottom row: predicted FC matrices.}
% \label{fig3}
% \end{figure}
% %The figure compares ground truth (top row) and predicted (bottom row) functional connectivity matrices }





%\subsubsection{Ablation Studies}
\subsubsection{Ablation Studies} 
% We compare our full model with its ablated variants in Table \ref{tab:merged_multi_colored}. 
Compared with ablated variants, our full model achieves the highest performance across most metrics (Table \ref{tab:merged_total}). %Notably, 
Removing the ROI-specific projection head significantly degrades performance, highlighting its role in capturing region-specific information. Excluding the ROI embedder also substantially reduces Pixcorr, indicating its importance in maintaining FC structure. While the model can still capture temporal dynamics, the drop in Pixcorr suggests the embedder refines spatial organization. 
Omitting spatial loss further weakens ROIs' spatial relationships. Overall, these findings underscore the necessity of each component in enabling UnEBOLT to jointly model regional specificity and cortical structure, ultimately supporting accurate time-series prediction and FC recovery.
% Omitting the spatial loss term further weakens spatial relationships between ROIs. 
%, increasing deviations in the reconstructed connectivity matrix.


\subsubsection{Model Generalization Analysis}
The present study primarily focuses on resting-state fMRI synthesis. To further assess the generalizability of the pretrained model to data acquired under different experimental conditions, we additionally evaluate the model on an auditory task-based EEG–fMRI dataset introduced in \cite{NEURIPS2024_Li}. This dataset was collected at different sites using distinct acquisition devices, providing a challenging evaluation setting. During the scans, binaural auditory stimuli were presented with randomized inter-stimulus intervals (ISI), and participants were instructed to press a button as soon as they heard each stimulus.

The task-based dataset comprises 16 scans from 10 healthy subjects, including 9 training scans, 3 validation scans, and 4 test scans. For a detailed description of the dataset and experimental protocol, we refer the reader to \cite{NEURIPS2024_Li}. Using this dataset, we conduct three types of unseen-subject scan prediction experiments:
(1) Zero-shot transfer, in which the model is trained exclusively on resting-state data and directly evaluated on task-condition data; and
(2) Fine-tuning, where the pretrained model is further fine-tuned on the task-condition training set prior to evaluation.
(3) Training from scratch using task-condition data.

Due to differences in EEG channel configurations between the two datasets, 
we use only the 23 EEG channels that are shared across both datasets as model input. As shown in Table~\ref{tab:zeroshot_finetune_comparison}, the model demonstrates strong generalization to data collected under a different experimental paradigm and with different acquisition hardware, despite using only a subset of the channels available in the full model configuration. Moreover, fine-tuning on the task-specific training data further improves performance across evaluation metrics.

\begin{table}[h]
\caption{Task-condition unseen scan reconstruction performance (P=64), reported as Mean{\scriptsize(std)} with best mean values in \textbf{Bold}.}
\centering
\scriptsize
\begin{tabular}{l c c c c c c}
\toprule
\textbf{Approach} & \textbf{Pretrain} & \textbf{Tcorr} & \textbf{Pixcorr} & \textbf{ConnMSE} & \textbf{F1-0.25} & \textbf{F1-0.50} \\
\midrule
Zero-shot
& \checkmark
& 0.377{\scriptsize(0.023)}
& 0.352{\scriptsize(0.148)}
& 0.093{\scriptsize(0.040)}
& \textbf{0.480}{\scriptsize(0.070)}
& 0.656{\scriptsize(0.048)} \\

Fine-tune
& \checkmark
& \textbf{0.406}{\scriptsize(0.083)}
& \textbf{0.421}{\scriptsize(0.220)}
& \textbf{0.086}{\scriptsize(0.030)}
& 0.472{\scriptsize(0.052)}
& \textbf{0.666}{\scriptsize(0.095)} \\

From scratch
& \XSolidBrush
& 0.379{\scriptsize(0.071)}
& 0.352{\scriptsize(0.237)}
& 0.098{\scriptsize(0.028)}
& 0.386{\scriptsize(0.041)}
& 0.628{\scriptsize(0.088)} \\
\bottomrule
\end{tabular}
\label{tab:zeroshot_finetune_comparison}
\end{table}




\section{Discussion and Conclusion}
% Chang Li rewrote this part since it overflows a single line.
We propose UnEBOLT, 
a \textbf{Un}ified, \textbf{E}EG-to-\textbf{BOL}D \textbf{T}ranslation model
designed for efficient end-to-end reconstruction of fMRI signals and functional connectivity from EEG. 
% a \textbf{Un}ified, end-to-end model 
% for \textbf{E}EG-to-\textbf{B}OLD \textbf{T}ranslation, 
Unlike existing ROI-specific models, UnEBOLT leverages an adaptive ROI-specific representation learning mechanism in a multi-region joint learning framework,
%that enables joint learning across multiple brain regions, 
improving training efficiency and scalability. In our experiments with the eyes-closed resting-state dataset, UnEBOLT demonstrates superior performance for whole-brain fMRI time series prediction and functional connectivity reconstruction in both intrascan and unseen subject settings compared with baselines. Furthermore, the model exhibits promising generalization capability when transferred to an unseen task-condition dataset. While the current study primarily focuses on eyes-closed resting-state EEG-fMRI synthesis, future work will extend the evaluation to a broader range of task-based paradigms as additional task-based EEG-fMRI datasets become available.

The strength of agreement between our predictions and ground-truth fMRI signals underscores UnEBOLT’s reliability and highlights its promise as a cost-effective, end-to-end tool for inferring fMRI from EEG. This unified approach opens new possibilities for large-scale brain research and future downstream applications in cognitive neuroscience, clinical diagnostics, and brain–computer interfaces.







\clearpage  % Acknowledgements, references, and appendix do not count toward the page limit (if any)
% Acknowledgments---Will not appear in anonymized version
\midlacknowledgments{We are grateful to support from the Sally and Dave Hopkins Faculty Fellowship and from NIH grants R01NS112252, P50MH109429, T32EB021937, and F31NS143413.}


\bibliography{midl26_194}
\appendix
% \input{appendix}
% \appendix
\label{appendix}

\section{Subject-wise Results}

\begin{figure}[h]
\includegraphics[width=\textwidth]{figures/appendix_distribution_fig.png}
\caption{Intrascan (left) and unseen subject (right) prediction performance distribution. Bars: mean values; Error bars: S.D.. Each point represents each scan.}
\label{appx_fig_dist}
\end{figure}


\begin{table}[h]
\caption{Subject-level performance (P=64) in unseen subject fMRI synthesis (N=6), reported as Mean{\scriptsize(std)}.}
\centering
\scriptsize
\begin{tabular}{l c c c c c}
\toprule
\textbf{Subject ID} & \textbf{Tcorr} & \textbf{Pixcorr} & \textbf{ConnMSE} & \textbf{F1-0.25} & \textbf{F1-0.50} \\
\midrule
sub07-scan01 & 0.460 & 0.506 & 0.068 & 0.470 & 0.661 \\
sub07-scan02 & 0.416 & 0.358 & 0.123 & 0.478 & 0.644 \\
sub11-scan01 & 0.437 & 0.710 & 0.050 & 0.665 & 0.767 \\
sub16-scan01 & 0.442 & 0.515 & 0.211 & 0.510 & 0.661 \\
sub18-scan01 & 0.365 & 0.623 & 0.029 & 0.583 & 0.684 \\
sub21-scan01 & 0.389 & 0.579 & 0.069 & 0.563 & 0.726 \\
\midrule
\textbf{Mean(std)} &
\textbf{0.418{\scriptsize(0.036)}} &
\textbf{0.549{\scriptsize(0.120)}} &
\textbf{0.092{\scriptsize(0.066)}} &
\textbf{0.545{\scriptsize(0.074)}} &
\textbf{0.690{\scriptsize(0.047)}} \\
\bottomrule
\end{tabular}
\label{tab:subject_level_stats}
\end{table}





\end{document}
